diff --git a/data/conf/perf-reg/tez/hive-site.xml b/data/conf/perf-reg/tez/hive-site.xml index 78a5481e03..7d38be04ca 100644 --- a/data/conf/perf-reg/tez/hive-site.xml +++ b/data/conf/perf-reg/tez/hive-site.xml @@ -168,10 +168,15 @@ hive.support.concurrency - false + true Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks. + + hive.txn.manager + org.apache.hadoop.hive.ql.lockmgr.DbTxnManager + + fs.pfile.impl org.apache.hadoop.fs.ProxyLocalFileSystem @@ -292,4 +297,14 @@ 99 + + hive.merge.nway.joins + false + + + + hive.stats.fetch.column.stats + true + + diff --git a/data/scripts/q_perf_test_init.sql b/data/scripts/q_perf_test_init.sql index d27215b4cb..020e2d0c49 100644 --- a/data/scripts/q_perf_test_init.sql +++ b/data/scripts/q_perf_test_init.sql @@ -1,5 +1,5 @@ drop table if exists call_center; -create external table call_center +create table call_center ( cc_call_center_sk int, cc_call_center_id string, @@ -34,10 +34,10 @@ create external table call_center cc_tax_percentage decimal(5,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists catalog_page; -create external table catalog_page +create table catalog_page ( cp_catalog_page_sk int, cp_catalog_page_id string, @@ -50,11 +50,11 @@ create external table catalog_page cp_type string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists catalog_returns; -create external table catalog_returns +create table catalog_returns ( cr_returned_date_sk int, cr_returned_time_sk int, @@ -85,11 +85,11 @@ create external table catalog_returns cr_net_loss decimal(7,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists catalog_sales; -create external table catalog_sales +create table catalog_sales ( cs_sold_date_sk int, cs_sold_time_sk int, @@ -127,11 +127,11 @@ create external table catalog_sales cs_net_profit decimal(7,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists customer; -create external table customer +create table customer ( c_customer_sk int, c_customer_id string, @@ -153,11 +153,11 @@ create external table customer c_last_review_date string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists customer_address; -create external table customer_address +create table customer_address ( ca_address_sk int, ca_address_id string, @@ -174,11 +174,11 @@ create external table customer_address ca_location_type string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists customer_demographics; -create external table customer_demographics +create table customer_demographics ( cd_demo_sk int, cd_gender string, @@ -191,11 +191,11 @@ create external table customer_demographics cd_dep_college_count int ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists date_dim; -create external table date_dim +create table date_dim ( d_date_sk int, d_date_id string, @@ -227,11 +227,11 @@ create external table date_dim d_current_year string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists household_demographics; -create external table household_demographics +create table household_demographics ( hd_demo_sk int, hd_income_band_sk int, @@ -240,22 +240,22 @@ create external table household_demographics hd_vehicle_count int ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists income_band; -create external table income_band +create table income_band ( ib_income_band_sk int, ib_lower_bound int, ib_upper_bound int ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists inventory; -create external table inventory +create table inventory ( inv_date_sk int, inv_item_sk int, @@ -263,11 +263,11 @@ create external table inventory inv_quantity_on_hand int ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists item; -create external table item +create table item ( i_item_sk int, i_item_id string, @@ -293,11 +293,11 @@ create external table item i_product_name string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists promotion; -create external table promotion +create table promotion ( p_promo_sk int, p_promo_id string, @@ -320,22 +320,22 @@ create external table promotion p_discount_active string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists reason; -create external table reason +create table reason ( r_reason_sk int, r_reason_id string, r_reason_desc string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists ship_mode; -create external table ship_mode +create table ship_mode ( sm_ship_mode_sk int, sm_ship_mode_id string, @@ -345,11 +345,11 @@ create external table ship_mode sm_contract string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists store; -create external table store +create table store ( s_store_sk int, s_store_id string, @@ -382,11 +382,11 @@ create external table store s_tax_precentage decimal(5,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists store_returns; -create external table store_returns +create table store_returns ( sr_returned_date_sk int, sr_return_time_sk int, @@ -410,11 +410,11 @@ create external table store_returns sr_net_loss decimal(7,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists store_sales; -create external table store_sales +create table store_sales ( ss_sold_date_sk int, ss_sold_time_sk int, @@ -441,11 +441,11 @@ create external table store_sales ss_net_profit decimal(7,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists time_dim; -create external table time_dim +create table time_dim ( t_time_sk int, t_time_id string, @@ -459,11 +459,11 @@ create external table time_dim t_meal_time string ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists warehouse; -create external table warehouse +create table warehouse ( w_warehouse_sk int, w_warehouse_id string, @@ -481,11 +481,11 @@ create external table warehouse w_gmt_offset decimal(5,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists web_page; -create external table web_page +create table web_page ( wp_web_page_sk int, wp_web_page_id string, @@ -503,11 +503,11 @@ create external table web_page wp_max_ad_count int ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists web_returns; -create external table web_returns +create table web_returns ( wr_returned_date_sk int, wr_returned_time_sk int, @@ -535,11 +535,11 @@ create external table web_returns wr_net_loss decimal(7,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists web_sales; -create external table web_sales +create table web_sales ( ws_sold_date_sk int, ws_sold_time_sk int, @@ -577,11 +577,11 @@ create external table web_sales ws_net_profit decimal(7,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); drop table if exists web_site; -create external table web_site +create table web_site ( web_site_sk int, web_site_id string, @@ -611,6 +611,6 @@ create external table web_site web_tax_percentage decimal(5,2) ) row format delimited fields terminated by '\t' -STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); diff --git a/data/scripts/q_perf_test_init_constraints.sql b/data/scripts/q_perf_test_init_constraints.sql new file mode 100644 index 0000000000..3b3f503ee4 --- /dev/null +++ b/data/scripts/q_perf_test_init_constraints.sql @@ -0,0 +1,771 @@ +drop table if exists call_center; +create table call_center +( + cc_call_center_sk int, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk int, + cc_open_date_sk int, + cc_name string, + cc_class string, + cc_employees int, + cc_sq_ft int, + cc_hours string, + cc_manager string, + cc_mkt_id int, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division int, + cc_division_name string, + cc_company int, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset decimal(5,2), + cc_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + +drop table if exists catalog_page; +create table catalog_page +( + cp_catalog_page_sk int, + cp_catalog_page_id string, + cp_start_date_sk int, + cp_end_date_sk int, + cp_department string, + cp_catalog_number int, + cp_catalog_page_number int, + cp_description string, + cp_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists catalog_returns; +create table catalog_returns +( + cr_returned_date_sk int, + cr_returned_time_sk int, + cr_item_sk int, + cr_refunded_customer_sk int, + cr_refunded_cdemo_sk int, + cr_refunded_hdemo_sk int, + cr_refunded_addr_sk int, + cr_returning_customer_sk int, + cr_returning_cdemo_sk int, + cr_returning_hdemo_sk int, + cr_returning_addr_sk int, + cr_call_center_sk int, + cr_catalog_page_sk int, + cr_ship_mode_sk int, + cr_warehouse_sk int, + cr_reason_sk int, + cr_order_number int, + cr_return_quantity int, + cr_return_amount decimal(7,2), + cr_return_tax decimal(7,2), + cr_return_amt_inc_tax decimal(7,2), + cr_fee decimal(7,2), + cr_return_ship_cost decimal(7,2), + cr_refunded_cash decimal(7,2), + cr_reversed_charge decimal(7,2), + cr_store_credit decimal(7,2), + cr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists catalog_sales; +create table catalog_sales +( + cs_sold_date_sk int, + cs_sold_time_sk int, + cs_ship_date_sk int, + cs_bill_customer_sk int, + cs_bill_cdemo_sk int, + cs_bill_hdemo_sk int, + cs_bill_addr_sk int, + cs_ship_customer_sk int, + cs_ship_cdemo_sk int, + cs_ship_hdemo_sk int, + cs_ship_addr_sk int, + cs_call_center_sk int, + cs_catalog_page_sk int, + cs_ship_mode_sk int, + cs_warehouse_sk int, + cs_item_sk int, + cs_promo_sk int, + cs_order_number int, + cs_quantity int, + cs_wholesale_cost decimal(7,2), + cs_list_price decimal(7,2), + cs_sales_price decimal(7,2), + cs_ext_discount_amt decimal(7,2), + cs_ext_sales_price decimal(7,2), + cs_ext_wholesale_cost decimal(7,2), + cs_ext_list_price decimal(7,2), + cs_ext_tax decimal(7,2), + cs_coupon_amt decimal(7,2), + cs_ext_ship_cost decimal(7,2), + cs_net_paid decimal(7,2), + cs_net_paid_inc_tax decimal(7,2), + cs_net_paid_inc_ship decimal(7,2), + cs_net_paid_inc_ship_tax decimal(7,2), + cs_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists customer; +create table customer +( + c_customer_sk int, + c_customer_id string, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day int, + c_birth_month int, + c_birth_year int, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists customer_address; +create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists customer_demographics; +create table customer_demographics +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists date_dim; +create table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists household_demographics; +create table household_demographics +( + hd_demo_sk int, + hd_income_band_sk int, + hd_buy_potential string, + hd_dep_count int, + hd_vehicle_count int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists income_band; +create table income_band +( + ib_income_band_sk int, + ib_lower_bound int, + ib_upper_bound int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists inventory; +create table inventory +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists item; +create table item +( + i_item_sk int, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price decimal(7,2), + i_wholesale_cost decimal(7,2), + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists promotion; +create table promotion +( + p_promo_sk int, + p_promo_id string, + p_start_date_sk int, + p_end_date_sk int, + p_item_sk int, + p_cost decimal(15,2), + p_response_target int, + p_promo_name string, + p_channel_dmail string, + p_channel_email string, + p_channel_catalog string, + p_channel_tv string, + p_channel_radio string, + p_channel_press string, + p_channel_event string, + p_channel_demo string, + p_channel_details string, + p_purpose string, + p_discount_active string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists reason; +create table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists ship_mode; +create table ship_mode +( + sm_ship_mode_sk int, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists store; +create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset decimal(5,2), + s_tax_precentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists store_returns; +create table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists store_sales; +create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists time_dim; +create table time_dim +( + t_time_sk int, + t_time_id string, + t_time int, + t_hour int, + t_minute int, + t_second int, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists warehouse; +create table warehouse +( + w_warehouse_sk int, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft int, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_page; +create table web_page +( + wp_web_page_sk int, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk int, + wp_access_date_sk int, + wp_autogen_flag string, + wp_customer_sk int, + wp_url string, + wp_type string, + wp_char_count int, + wp_link_count int, + wp_image_count int, + wp_max_ad_count int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_returns; +create table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_sales; +create table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +drop table if exists web_site; +create table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB"); + + +-- CONSTRAINTS +alter table customer_address add constraint pk_ca primary key (ca_address_sk) disable novalidate rely; +alter table customer_demographics add constraint pk_cd primary key (cd_demo_sk) disable novalidate rely; +alter table date_dim add constraint pk_dd primary key (d_date_sk) disable novalidate rely; +alter table warehouse add constraint pk_w primary key (w_warehouse_sk) disable novalidate rely; +alter table ship_mode add constraint pk_sm primary key (sm_ship_mode_sk) disable novalidate rely; +alter table time_dim add constraint pk_td primary key (t_time_sk) disable novalidate rely; +alter table reason add constraint pk_r primary key (r_reason_sk) disable novalidate rely; +alter table income_band add constraint pk_ib primary key (ib_income_band_sk) disable novalidate rely; +alter table item add constraint pk_i primary key (i_item_sk) disable novalidate rely; +alter table store add constraint pk_s primary key (s_store_sk) disable novalidate rely; +alter table call_center add constraint pk_cc primary key (cc_call_center_sk) disable novalidate rely; +alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely; +alter table web_site add constraint pk_ws primary key (web_site_sk) disable novalidate rely; +alter table store_returns add constraint pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely; +alter table household_demographics add constraint pk_hd primary key (hd_demo_sk) disable novalidate rely; +alter table web_page add constraint pk_wp primary key (wp_web_page_sk) disable novalidate rely; +alter table promotion add constraint pk_p primary key (p_promo_sk) disable novalidate rely; +alter table catalog_page add constraint pk_cp primary key (cp_catalog_page_sk) disable novalidate rely; +-- partition_col case +alter table inventory add constraint pk_in primary key (inv_date_sk, inv_item_sk, inv_warehouse_sk) disable novalidate rely; +alter table catalog_returns add constraint pk_cr primary key (cr_item_sk, cr_order_number) disable novalidate rely; +alter table web_returns add constraint pk_wr primary key (wr_item_sk, wr_order_number) disable novalidate rely; +alter table web_sales add constraint pk_ws2 primary key (ws_item_sk, ws_order_number) disable novalidate rely; +alter table catalog_sales add constraint pk_cs primary key (cs_item_sk, cs_order_number) disable novalidate rely; +alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely; + +alter table call_center add constraint cc_d1 foreign key (cc_closed_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table call_center add constraint cc_d2 foreign key (cc_open_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_page add constraint cp_d1 foreign key (cp_end_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_page add constraint cp_d2 foreign key (cp_start_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_cc foreign key (cr_call_center_sk) references call_center (cc_call_center_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_cp foreign key (cr_catalog_page_sk) references catalog_page (cp_catalog_page_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_cs foreign key (cr_item_sk, cr_order_number) references catalog_sales (cs_item_sk, cs_order_number) disable novalidate rely; +alter table catalog_returns add constraint cr_i foreign key (cr_item_sk) references item (i_item_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_r foreign key (cr_reason_sk) references reason (r_reason_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_a1 foreign key (cr_refunded_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_cd1 foreign key (cr_refunded_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_c1 foreign key (cr_refunded_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_hd1 foreign key (cr_refunded_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +-- partition_col case +alter table catalog_returns add constraint cr_d1 foreign key (cr_returned_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_t foreign key (cr_returned_time_sk) references time_dim (t_time_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_a2 foreign key (cr_returning_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_cd2 foreign key (cr_returning_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_c2 foreign key (cr_returning_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_hd2 foreign key (cr_returning_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +-- alter table catalog_returns add constraint cr_d2 foreign key (cr_ship_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_sm foreign key (cr_ship_mode_sk) references ship_mode (sm_ship_mode_sk) disable novalidate rely; +alter table catalog_returns add constraint cr_w2 foreign key (cr_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_b_a foreign key (cs_bill_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_b_cd foreign key (cs_bill_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_b_c foreign key (cs_bill_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_b_hd foreign key (cs_bill_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_cc foreign key (cs_call_center_sk) references call_center (cc_call_center_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_cp foreign key (cs_catalog_page_sk) references catalog_page (cp_catalog_page_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_i foreign key (cs_item_sk) references item (i_item_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_p foreign key (cs_promo_sk) references promotion (p_promo_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_s_a foreign key (cs_ship_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_s_cd foreign key (cs_ship_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_s_c foreign key (cs_ship_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_d1 foreign key (cs_ship_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_s_hd foreign key (cs_ship_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_sm foreign key (cs_ship_mode_sk) references ship_mode (sm_ship_mode_sk) disable novalidate rely; +-- partition_col case +alter table catalog_sales add constraint cs_d2 foreign key (cs_sold_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_t foreign key (cs_sold_time_sk) references time_dim (t_time_sk) disable novalidate rely; +alter table catalog_sales add constraint cs_w foreign key (cs_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; +alter table customer add constraint c_a foreign key (c_current_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table customer add constraint c_cd foreign key (c_current_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table customer add constraint c_hd foreign key (c_current_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table customer add constraint c_fsd foreign key (c_first_sales_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table customer add constraint c_fsd2 foreign key (c_first_shipto_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table household_demographics add constraint hd_ib foreign key (hd_income_band_sk) references income_band (ib_income_band_sk) disable novalidate rely; +-- partition_col case +alter table inventory add constraint inv_d foreign key (inv_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table inventory add constraint inv_i foreign key (inv_item_sk) references item (i_item_sk) disable novalidate rely; +alter table inventory add constraint inv_w foreign key (inv_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; +alter table promotion add constraint p_end_date foreign key (p_end_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table promotion add constraint p_i foreign key (p_item_sk) references item (i_item_sk) disable novalidate rely; +alter table promotion add constraint p_start_date foreign key (p_start_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table store add constraint s_close_date foreign key (s_closed_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table store_returns add constraint sr_a foreign key (sr_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table store_returns add constraint sr_cd foreign key (sr_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table store_returns add constraint sr_c foreign key (sr_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table store_returns add constraint sr_hd foreign key (sr_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table store_returns add constraint sr_i foreign key (sr_item_sk) references item (i_item_sk) disable novalidate rely; +alter table store_returns add constraint sr_r foreign key (sr_reason_sk) references reason (r_reason_sk) disable novalidate rely; +-- partition_col case +alter table store_returns add constraint sr_ret_d foreign key (sr_returned_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table store_returns add constraint sr_t foreign key (sr_return_time_sk) references time_dim (t_time_sk) disable novalidate rely; +alter table store_returns add constraint sr_s foreign key (sr_store_sk) references store (s_store_sk) disable novalidate rely; +alter table store_returns add constraint sr_ss foreign key (sr_item_sk, sr_ticket_number) references store_sales (ss_item_sk, ss_ticket_number) disable novalidate rely; +alter table store_sales add constraint ss_a foreign key (ss_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table store_sales add constraint ss_cd foreign key (ss_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table store_sales add constraint ss_hd foreign key (ss_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table store_sales add constraint ss_i foreign key (ss_item_sk) references item (i_item_sk) disable novalidate rely; +alter table store_sales add constraint ss_p foreign key (ss_promo_sk) references promotion (p_promo_sk) disable novalidate rely; +-- partition_col case +alter table store_sales add constraint ss_d foreign key (ss_sold_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table store_sales add constraint ss_t foreign key (ss_sold_time_sk) references time_dim (t_time_sk) disable novalidate rely; +alter table store_sales add constraint ss_s foreign key (ss_store_sk) references store (s_store_sk) disable novalidate rely; +alter table web_page add constraint wp_ad foreign key (wp_access_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table web_page add constraint wp_cd foreign key (wp_creation_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table web_returns add constraint wr_i foreign key (wr_item_sk) references item (i_item_sk) disable novalidate rely; +alter table web_returns add constraint wr_r foreign key (wr_reason_sk) references reason (r_reason_sk) disable novalidate rely; +alter table web_returns add constraint wr_ref_a foreign key (wr_refunded_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table web_returns add constraint wr_ref_cd foreign key (wr_refunded_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table web_returns add constraint wr_ref_c foreign key (wr_refunded_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table web_returns add constraint wr_ref_hd foreign key (wr_refunded_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +-- partition_col case +alter table web_returns add constraint wr_ret_d foreign key (wr_returned_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table web_returns add constraint wr_ret_t foreign key (wr_returned_time_sk) references time_dim (t_time_sk) disable novalidate rely; +alter table web_returns add constraint wr_ret_a foreign key (wr_returning_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table web_returns add constraint wr_ret_cd foreign key (wr_returning_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table web_returns add constraint wr_ret_c foreign key (wr_returning_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table web_returns add constraint wr_ret_hd foreign key (wr_returning_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table web_returns add constraint wr_ws foreign key (wr_item_sk, wr_order_number) references web_sales (ws_item_sk, ws_order_number) disable novalidate rely; +alter table web_returns add constraint wr_wp foreign key (wr_web_page_sk) references web_page (wp_web_page_sk) disable novalidate rely; +alter table web_sales add constraint ws_b_a foreign key (ws_bill_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table web_sales add constraint ws_b_cd foreign key (ws_bill_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table web_sales add constraint ws_b_c foreign key (ws_bill_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table web_sales add constraint ws_b_hd foreign key (ws_bill_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table web_sales add constraint ws_i foreign key (ws_item_sk) references item (i_item_sk) disable novalidate rely; +alter table web_sales add constraint ws_p foreign key (ws_promo_sk) references promotion (p_promo_sk) disable novalidate rely; +alter table web_sales add constraint ws_s_a foreign key (ws_ship_addr_sk) references customer_address (ca_address_sk) disable novalidate rely; +alter table web_sales add constraint ws_s_cd foreign key (ws_ship_cdemo_sk) references customer_demographics (cd_demo_sk) disable novalidate rely; +alter table web_sales add constraint ws_s_c foreign key (ws_ship_customer_sk) references customer (c_customer_sk) disable novalidate rely; +alter table web_sales add constraint ws_s_d foreign key (ws_ship_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table web_sales add constraint ws_s_hd foreign key (ws_ship_hdemo_sk) references household_demographics (hd_demo_sk) disable novalidate rely; +alter table web_sales add constraint ws_sm foreign key (ws_ship_mode_sk) references ship_mode (sm_ship_mode_sk) disable novalidate rely; +-- partition_col case +alter table web_sales add constraint ws_d2 foreign key (ws_sold_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table web_sales add constraint ws_t foreign key (ws_sold_time_sk) references time_dim (t_time_sk) disable novalidate rely; +alter table web_sales add constraint ws_w2 foreign key (ws_warehouse_sk) references warehouse (w_warehouse_sk) disable novalidate rely; +alter table web_sales add constraint ws_wp foreign key (ws_web_page_sk) references web_page (wp_web_page_sk) disable novalidate rely; +alter table web_sales add constraint ws_ws foreign key (ws_web_site_sk) references web_site (web_site_sk) disable novalidate rely; +alter table web_site add constraint web_d1 foreign key (web_close_date_sk) references date_dim (d_date_sk) disable novalidate rely; +alter table web_site add constraint web_d2 foreign key (web_open_date_sk) references date_dim (d_date_sk) disable novalidate rely; + +alter table store change column s_store_id s_store_id string constraint strid_nn not null disable novalidate rely; +alter table call_center change column cc_call_center_id cc_call_center_id string constraint ccid_nn not null disable novalidate rely; +alter table catalog_page change column cp_catalog_page_id cp_catalog_page_id string constraint cpid_nn not null disable novalidate rely; +alter table web_site change column web_site_id web_site_id string constraint wsid_nn not null disable novalidate rely; +alter table web_page change column wp_web_page_id wp_web_page_id string constraint wpid_nn not null disable novalidate rely; +alter table warehouse change column w_warehouse_id w_warehouse_id string constraint wid_nn not null disable novalidate rely; +alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely; +alter table customer_address change column ca_address_id ca_address_id string constraint caid_nn not null disable novalidate rely; +alter table date_dim change column d_date_id d_date_id string constraint did_nn not null disable novalidate rely; +alter table item change column i_item_id i_item_id string constraint itid_nn not null disable novalidate rely; +alter table promotion change column p_promo_id p_promo_id string constraint pid_nn not null disable novalidate rely; +alter table reason change column r_reason_id r_reason_id string constraint rid_nn not null disable novalidate rely; +alter table ship_mode change column sm_ship_mode_id sm_ship_mode_id string constraint smid_nn not null disable novalidate rely; +alter table time_dim change column t_time_id t_time_id string constraint tid_nn not null disable novalidate rely; diff --git a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java index 98ceb21404..18a470fd11 100644 --- a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java +++ b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java @@ -34,7 +34,7 @@ @RunWith(Parameterized.class) public class TestTezPerfCliDriver { - static CliAdapter adapter = new CliConfigs.TezPerfCliConfig().getCliAdapter(); + static CliAdapter adapter = new CliConfigs.TezPerfCliConfig(false).getCliAdapter(); @Parameters(name = "{0}") public static List getParameters() throws Exception { diff --git a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfConstraintsCliDriver.java b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfConstraintsCliDriver.java new file mode 100644 index 0000000000..5881353387 --- /dev/null +++ b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfConstraintsCliDriver.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.cli; + +import java.io.File; +import java.util.Comparator; +import java.util.List; +import org.apache.hadoop.hive.cli.control.CliAdapter; +import org.apache.hadoop.hive.cli.control.CliConfigs; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestRule; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestTezPerfConstraintsCliDriver { + + static CliAdapter adapter = new CliConfigs.TezPerfCliConfig(true).getCliAdapter(); + + @Parameters(name = "{0}") + public static List getParameters() throws Exception { + List parameters = adapter.getParameters(); + parameters.sort(new C1()); + return parameters; + } + + static class C1 implements Comparator { + + @Override + public int compare(Object[] o1, Object[] o2) { + return o1[0].toString().compareTo(o2[0].toString()); + } + + } + + @ClassRule + public static TestRule cliClassRule = adapter.buildClassRule(); + + @Rule + public TestRule cliTestRule = adapter.buildTestRule(); + + private String name; + private File qfile; + + public TestTezPerfConstraintsCliDriver(String name, File qfile) { + this.name = name; + this.qfile = qfile; + } + + @Test + public void testCliDriver() throws Exception { + adapter.runTest(name, qfile); + } + +} diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8349e3d84e..a74ba1bd73 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1731,7 +1731,104 @@ spark.only.query.negative.files=spark_job_max_tasks.q,\ spark_submit_negative_executor_memory.q spark.perf.disabled.query.files=query14.q,\ - query64.q + query64.q,\ + cbo_query1.q,\ + cbo_query10.q,\ + cbo_query11.q,\ + cbo_query12.q,\ + cbo_query13.q,\ + cbo_query14.q,\ + cbo_query15.q,\ + cbo_query16.q,\ + cbo_query17.q,\ + cbo_query18.q,\ + cbo_query19.q,\ + cbo_query2.q,\ + cbo_query20.q,\ + cbo_query21.q,\ + cbo_query22.q,\ + cbo_query23.q,\ + cbo_query24.q,\ + cbo_query25.q,\ + cbo_query26.q,\ + cbo_query27.q,\ + cbo_query28.q,\ + cbo_query29.q,\ + cbo_query3.q,\ + cbo_query30.q,\ + cbo_query31.q,\ + cbo_query32.q,\ + cbo_query33.q,\ + cbo_query34.q,\ + cbo_query35.q,\ + cbo_query36.q,\ + cbo_query37.q,\ + cbo_query38.q,\ + cbo_query39.q,\ + cbo_query4.q,\ + cbo_query40.q,\ + cbo_query42.q,\ + cbo_query43.q,\ + cbo_query44.q,\ + cbo_query45.q,\ + cbo_query46.q,\ + cbo_query47.q,\ + cbo_query48.q,\ + cbo_query49.q,\ + cbo_query5.q,\ + cbo_query50.q,\ + cbo_query51.q,\ + cbo_query52.q,\ + cbo_query53.q,\ + cbo_query54.q,\ + cbo_query55.q,\ + cbo_query56.q,\ + cbo_query57.q,\ + cbo_query58.q,\ + cbo_query59.q,\ + cbo_query6.q,\ + cbo_query60.q,\ + cbo_query61.q,\ + cbo_query63.q,\ + cbo_query64.q,\ + cbo_query65.q,\ + cbo_query66.q,\ + cbo_query67.q,\ + cbo_query68.q,\ + cbo_query69.q,\ + cbo_query7.q,\ + cbo_query70.q,\ + cbo_query71.q,\ + cbo_query72.q,\ + cbo_query73.q,\ + cbo_query74.q,\ + cbo_query75.q,\ + cbo_query76.q,\ + cbo_query77.q,\ + cbo_query78.q,\ + cbo_query79.q,\ + cbo_query8.q,\ + cbo_query80.q,\ + cbo_query81.q,\ + cbo_query82.q,\ + cbo_query83.q,\ + cbo_query84.q,\ + cbo_query85.q,\ + cbo_query86.q,\ + cbo_query87.q,\ + cbo_query88.q,\ + cbo_query89.q,\ + cbo_query9.q,\ + cbo_query90.q,\ + cbo_query91.q,\ + cbo_query92.q,\ + cbo_query93.q,\ + cbo_query94.q,\ + cbo_query95.q,\ + cbo_query96.q,\ + cbo_query97.q,\ + cbo_query98.q,\ + cbo_query99.q druid.query.files=druidmini_test1.q,\ druidmini_test_ts.q,\ diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 5e1e88e89d..afff0df759 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -280,7 +280,7 @@ public ContribCliConfig() { } public static class TezPerfCliConfig extends AbstractCliConfig { - public TezPerfCliConfig() { + public TezPerfCliConfig(boolean useConstraints) { super(CorePerfCliDriver.class); try { setQueryDir("ql/src/test/queries/clientpositive/perf"); @@ -290,10 +290,21 @@ public TezPerfCliConfig() { excludesFrom(testConfigProps, "encrypted.query.files"); excludesFrom(testConfigProps, "erasurecoding.only.query.files"); - setResultsDir("ql/src/test/results/clientpositive/perf/tez"); + excludeQuery("cbo_query44.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query45.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query67.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query70.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query86.q"); // TODO: Enable when we move to Calcite 1.18 + setLogDir("itests/qtest/target/qfile-results/clientpositive/tez"); - setInitScript("q_perf_test_init.sql"); + if (useConstraints) { + setInitScript("q_perf_test_init_constraints.sql"); + setResultsDir("ql/src/test/results/clientpositive/perf/tez/constraints"); + } else { + setInitScript("q_perf_test_init.sql"); + setResultsDir("ql/src/test/results/clientpositive/perf/tez"); + } setCleanupScript("q_perf_test_cleanup.sql"); setHiveConfDir("data/conf/perf-reg/tez"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index b4d5806d4e..aabc34dad9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -105,6 +105,7 @@ protected String cboInfo; protected boolean cboSucceeded; protected String optimizedSql; + protected String calcitePlan; protected String cmd = ""; private TokenRewriteStream tokenRewriteStream; // Holds the qualified name to tokenRewriteStream for the views @@ -1021,6 +1022,14 @@ public void setCboSucceeded(boolean cboSucceeded) { this.cboSucceeded = cboSucceeded; } + public String getCalcitePlan() { + return this.calcitePlan; + } + + public void setCalcitePlan(String calcitePlan) { + this.calcitePlan = calcitePlan; + } + public Table getMaterializedTable(String cteName) { return cteTables.get(cteName); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 46bf088f2c..4cc5fa8a2e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -140,6 +140,16 @@ static JSONObject getJSONDependencies(ExplainWork work) return outJSONObject; } + public String outputCboPlan(String cboPlan, PrintStream out, boolean jsonOutput) + throws JSONException { + if (out != null) { + out.println("CBO PLAN:"); + out.println(cboPlan); + } + + return jsonOutput ? cboPlan : null; + } + public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws Exception { isLogical = true; @@ -385,7 +395,11 @@ public int execute(DriverContext driverContext) { OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); - if (work.isLogical()) { + if (work.isCbo()) { + if (work.getCboPlan() != null) { + outputCboPlan(work.getCboPlan(), out, work.isFormatted()); + } + } else if (work.isLogical()) { JSONObject jsonLogicalPlan = getJSONLogicalPlan(out, work); if (work.isFormatted()) { out.print(jsonLogicalPlan); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java index 92fcfec673..8b10823b37 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java @@ -268,7 +268,8 @@ public void run() { null,// analyzer config, //explainConfig null, // cboInfo - plan.getOptimizedQueryString() // optimizedSQL + plan.getOptimizedQueryString(), // optimizedSQL + null ); @SuppressWarnings("unchecked") ExplainTask explain = (ExplainTask) TaskFactory.get(work); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 0af30d48f3..5a613b82ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -495,7 +495,8 @@ private JSONObject getExplainPlan(QueryPlan plan, HiveConf conf, HookContext hoo null, null, // analyzer config, // explainConfig plan.getCboInfo(), // cboInfo, - plan.getOptimizedQueryString() + plan.getOptimizedQueryString(), + null ); ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf); explain.initialize(hookContext.getQueryState(), plan, null, null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index dc0a84b37d..9aa30129b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -18,20 +18,28 @@ package org.apache.hadoop.hive.ql.optimizer.calcite; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; import com.google.common.collect.ImmutableList; +import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelReferentialConstraint; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; @@ -45,12 +53,15 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexTableInputRef; +import org.apache.calcite.rex.RexTableInputRef.RelTableRef; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -524,4 +535,231 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper // It passed all the tests return false; } + + public static Pair> isRewritablePKFKJoin(RelBuilder builder, Join join, + boolean leftInputPotentialFK, RelMetadataQuery mq) { + final JoinRelType joinType = join.getJoinType(); + final RexNode cond = join.getCondition(); + final RelNode fkInput = leftInputPotentialFK ? join.getLeft() : join.getRight(); + final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : join.getLeft(); + final Pair> nonRewritable = Pair.of(false, null); + + if (joinType != JoinRelType.INNER) { + // If it is not an inner, we transform it as the metadata + // providers for expressions do not pull information through + // outer join (as it would not be correct) + join = (Join) builder + .push(join.getLeft()).push(join.getRight()) + .join(JoinRelType.INNER, cond) + .build(); + } + + // 1) Check whether there is any filtering condition on the + // non-FK side. Basically we check whether the operators + // below altered the PK cardinality in any way + if (HiveRelOptUtil.isRowFilteringPlan(mq, nonFkInput)) { + return nonRewritable; + } + + // 2) Check whether there is an FK relationship + final Map refToRex = new HashMap<>(); + final EquivalenceClasses ec = new EquivalenceClasses(); + for (RexNode conj : RelOptUtil.conjunctions(cond)) { + if (!conj.isA(SqlKind.EQUALS)) { + // Not an equality, we bail out + return nonRewritable; + } + RexCall equiCond = (RexCall) conj; + RexNode eqOp1 = equiCond.getOperands().get(0); + Set eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); + if (eqOp1ExprsLineage == null) { + // Cannot be mapped, bail out + return nonRewritable; + } + RexNode eqOp2 = equiCond.getOperands().get(1); + Set eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); + if (eqOp2ExprsLineage == null) { + // Cannot be mapped, bail out + return nonRewritable; + } + List eqOp2ExprsFiltered = null; + for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { + RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); + if (inputRef1 == null) { + // Bail out as this condition could not be map into an input reference + return nonRewritable; + } + refToRex.put(inputRef1, eqOp1); + if (eqOp2ExprsFiltered == null) { + // First iteration + eqOp2ExprsFiltered = new ArrayList<>(); + for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { + RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); + if (inputRef2 == null) { + // Bail out as this condition could not be map into an input reference + return nonRewritable; + } + // Add to list of expressions for follow-up iterations + eqOp2ExprsFiltered.add(inputRef2); + // Add to equivalence classes and backwards mapping + ec.addEquivalenceClass(inputRef1, inputRef2); + refToRex.put(inputRef2, eqOp2); + } + } else { + // Rest of iterations, only adding, no checking + for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { + ec.addEquivalenceClass(inputRef1, inputRef2); + } + } + } + } + if (ec.getEquivalenceClassesMap().isEmpty()) { + // This may be a cartesian product, we bail out + return nonRewritable; + } + + // 3) Gather all tables from the FK side and the table from the + // non-FK side + final Set leftTables = mq.getTableReferences(join.getLeft()); + final Set rightTables = + Sets.difference(mq.getTableReferences(join), mq.getTableReferences(join.getLeft())); + final Set fkTables = join.getLeft() == fkInput ? leftTables : rightTables; + final Set nonFkTables = join.getLeft() == fkInput ? rightTables : leftTables; + assert nonFkTables.size() == 1; + final RelTableRef nonFkTable = nonFkTables.iterator().next(); + final List nonFkTableQName = nonFkTable.getQualifiedName(); + + // 4) For each table, check whether there is a matching on the non-FK side. + // If there is and it is the only condition, we are ready to transform + boolean canBeRewritten = false; + List nullableNodes = null; + for (RelTableRef tRef : fkTables) { + List constraints = tRef.getTable().getReferentialConstraints(); + for (RelReferentialConstraint constraint : constraints) { + if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { + nullableNodes = new ArrayList<>(); + EquivalenceClasses ecT = EquivalenceClasses.copy(ec); + boolean allContained = true; + for (int pos = 0; pos < constraint.getNumColumns(); pos++) { + int foreignKeyPos = constraint.getColumnPairs().get(pos).source; + RelDataType foreignKeyColumnType = + tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); + RexTableInputRef foreignKeyColumnRef = + RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); + int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; + RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, + nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); + if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { + if (foreignKeyColumnType.isNullable()) { + if (joinType == JoinRelType.INNER) { + // If it is nullable and it is an INNER, we just need a IS NOT NULL filter + RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); + assert originalCondOp != null; + nullableNodes.add(originalCondOp); + } else { + // If it is nullable and this is not an INNER, we cannot execute any transformation + allContained = false; + break; + } + } + // Remove this condition from eq classes as we have checked that it is present + // in the join condition + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); + } + ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); + } + } else { + // No relationship, we cannot do anything + allContained = false; + break; + } + } + if (allContained && ecT.getEquivalenceClassesMap().isEmpty()) { + // We made it + canBeRewritten = true; + break; + } + } + } + } + + return Pair.of(canBeRewritten, nullableNodes); + } + + private static RexTableInputRef extractTableInputRef(RexNode node) { + RexTableInputRef ref = null; + if (node instanceof RexTableInputRef) { + ref = (RexTableInputRef) node; + } else if (RexUtil.isLosslessCast(node) && + ((RexCall) node).getOperands().get(0) instanceof RexTableInputRef) { + ref = (RexTableInputRef) ((RexCall) node).getOperands().get(0); + } + return ref; + } + + /** + * Class representing an equivalence class, i.e., a set of equivalent columns + * + * TODO: This is a subset of a private class in materialized view rewriting + * in Calcite. It should be moved to its own class in Calcite so it can be + * accessible here. + */ + private static class EquivalenceClasses { + + private final Map> nodeToEquivalenceClass; + + protected EquivalenceClasses() { + nodeToEquivalenceClass = new HashMap<>(); + } + + protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { + Set c1 = nodeToEquivalenceClass.get(p1); + Set c2 = nodeToEquivalenceClass.get(p2); + if (c1 != null && c2 != null) { + // Both present, we need to merge + if (c1.size() < c2.size()) { + // We swap them to merge + Set c2Temp = c2; + c2 = c1; + c1 = c2Temp; + } + for (RexTableInputRef newRef : c2) { + c1.add(newRef); + nodeToEquivalenceClass.put(newRef, c1); + } + } else if (c1 != null) { + // p1 present, we need to merge into it + c1.add(p2); + nodeToEquivalenceClass.put(p2, c1); + } else if (c2 != null) { + // p2 present, we need to merge into it + c2.add(p1); + nodeToEquivalenceClass.put(p1, c2); + } else { + // None are present, add to same equivalence class + Set equivalenceClass = new LinkedHashSet<>(); + equivalenceClass.add(p1); + equivalenceClass.add(p2); + nodeToEquivalenceClass.put(p1, equivalenceClass); + nodeToEquivalenceClass.put(p2, equivalenceClass); + } + } + + protected Map> getEquivalenceClassesMap() { + return nodeToEquivalenceClass; + } + + protected static EquivalenceClasses copy(EquivalenceClasses ec) { + final EquivalenceClasses newEc = new EquivalenceClasses(); + for (Entry> e : ec.nodeToEquivalenceClass.entrySet()) { + newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); + } + return newEc; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index 0a307f248a..534a5c9531 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -53,6 +53,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; @@ -105,7 +106,6 @@ public void onMatch(RelOptRuleCall call) { // If it is a left outer, left will be the FK side. // If it is a right outer, right will be the FK side. final RelNode fkInput; - final RelNode nonFkInput; final ImmutableBitSet topRefs = RelOptUtil.InputFinder.bits(topProjExprs, null); final ImmutableBitSet leftBits = @@ -204,17 +204,14 @@ public void onMatch(RelOptRuleCall call) { return; } fkInput = leftInputPotentialFK ? leftInput : rightInput; - nonFkInput = leftInputPotentialFK ? rightInput : leftInput; mode = Mode.REMOVE; break; case LEFT: fkInput = leftInput; - nonFkInput = rightInput; mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM; break; case RIGHT: fkInput = rightInput; - nonFkInput = leftInput; mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM; break; default: @@ -222,151 +219,13 @@ public void onMatch(RelOptRuleCall call) { return; } - // 2) Check whether there is any filtering condition on the - // non-FK side. Basically we check whether the operators - // below altered the PK cardinality in any way - final RelMetadataQuery mq = call.getMetadataQuery(); - if (HiveRelOptUtil.isRowFilteringPlan(mq, nonFkInput)) { - return; - } - - // 3) Check whether there is an FK relationship - if (join.getJoinType() != JoinRelType.INNER) { - // If it is not an inner, we transform it as the metadata - // providers for expressions do not pull information through - // outer join (as it would not be correct) - join = (Join) call.builder() - .push(leftInput).push(rightInput) - .join(JoinRelType.INNER, cond) - .build(); - } - final Map refToRex = new HashMap<>(); - final EquivalenceClasses ec = new EquivalenceClasses(); - for (RexNode conj : RelOptUtil.conjunctions(cond)) { - if (!conj.isA(SqlKind.EQUALS)) { - // Not an equality, we bail out - return; - } - RexCall equiCond = (RexCall) conj; - RexNode eqOp1 = equiCond.getOperands().get(0); - Set eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); - if (eqOp1ExprsLineage == null) { - // Cannot be mapped, bail out - return; - } - RexNode eqOp2 = equiCond.getOperands().get(1); - Set eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); - if (eqOp2ExprsLineage == null) { - // Cannot be mapped, bail out - return; - } - List eqOp2ExprsFiltered = null; - for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { - RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); - if (inputRef1 == null) { - // Bail out as this condition could not be map into an input reference - return; - } - refToRex.put(inputRef1, eqOp1); - if (eqOp2ExprsFiltered == null) { - // First iteration - eqOp2ExprsFiltered = new ArrayList<>(); - for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { - RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); - if (inputRef2 == null) { - // Bail out as this condition could not be map into an input reference - return; - } - // Add to list of expressions for follow-up iterations - eqOp2ExprsFiltered.add(inputRef2); - // Add to equivalence classes and backwards mapping - ec.addEquivalenceClass(inputRef1, inputRef2); - refToRex.put(inputRef2, eqOp2); - } - } else { - // Rest of iterations, only adding, no checking - for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { - ec.addEquivalenceClass(inputRef1, inputRef2); - } - } - } - } - if (ec.getEquivalenceClassesMap().isEmpty()) { - // This may be a cartesian product, we bail out - return; - } - - // 4) Gather all tables from the FK side and the table from the - // non-FK side - final Set leftTables = mq.getTableReferences(leftInput); - final Set rightTables = - Sets.difference(mq.getTableReferences(join), mq.getTableReferences(leftInput)); - final Set fkTables = leftInputPotentialFK ? leftTables : rightTables; - final Set nonFkTables = leftInputPotentialFK ? rightTables : leftTables; - assert nonFkTables.size() == 1; - final RelTableRef nonFkTable = nonFkTables.iterator().next(); - final List nonFkTableQName = nonFkTable.getQualifiedName(); + // 2) Check whether this join can be rewritten or removed + Pair> r = HiveRelOptUtil.isRewritablePKFKJoin(call.builder(), + join, leftInput == fkInput, call.getMetadataQuery()); - // 5) For each table, check whether there is a matching on the non-FK side. - // If there is and it is the only condition, we are ready to transform - boolean canBeRewritten = false; - List nullableNodes = new ArrayList<>(); - for (RelTableRef tRef : fkTables) { - List constraints = tRef.getTable().getReferentialConstraints(); - for (RelReferentialConstraint constraint : constraints) { - if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { - EquivalenceClasses ecT = EquivalenceClasses.copy(ec); - boolean allContained = true; - for (int pos = 0; pos < constraint.getNumColumns(); pos++) { - int foreignKeyPos = constraint.getColumnPairs().get(pos).source; - RelDataType foreignKeyColumnType = - tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); - RexTableInputRef foreignKeyColumnRef = - RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); - if (foreignKeyColumnType.isNullable()) { - if (joinType == JoinRelType.INNER) { - // If it is nullable and it is an INNER, we just need a IS NOT NULL filter - RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); - assert originalCondOp != null; - nullableNodes.add(originalCondOp); - } else { - // If it is nullable and this is not an INNER, we cannot execute any transformation - allContained = false; - break; - } - } - int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; - RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, - nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); - if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && - ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { - // Remove this condition from eq classes as we have checked that it is present - // in the join condition - ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); - } - ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); - } - } else { - // No relationship, we cannot do anything - allContained = false; - break; - } - } - if (allContained && ecT.getEquivalenceClassesMap().isEmpty()) { - // We made it - canBeRewritten = true; - break; - } - } - } - } - - // 6) If it is the only condition, we can trigger the rewriting - if (canBeRewritten) { + // 3) If it is the only condition, we can trigger the rewriting + if (r.left) { + List nullableNodes = r.right; // If we reach here, we trigger the transform if (mode == Mode.REMOVE) { if (rightInputPotentialFK) { @@ -410,84 +269,13 @@ public void onMatch(RelOptRuleCall call) { call.transformTo(call.builder() .push(leftInput).push(rightInput) .join(JoinRelType.INNER, join.getCondition()) + .convert(call.rel(1).getRowType(), false) // Preserve nullability .project(project.getChildExps()) .build()); } } } - private static RexTableInputRef extractTableInputRef(RexNode node) { - RexTableInputRef ref = null; - if (node instanceof RexTableInputRef) { - ref = (RexTableInputRef) node; - } else if (RexUtil.isLosslessCast(node) && - ((RexCall) node).getOperands().get(0) instanceof RexTableInputRef) { - ref = (RexTableInputRef) ((RexCall) node).getOperands().get(0); - } - return ref; - } - - /** - * Class representing an equivalence class, i.e., a set of equivalent columns - * - * TODO: This is a subset of a private class in materialized view rewriting - * in Calcite. It should be moved to its own class in Calcite so it can be - * accessible here. - */ - private static class EquivalenceClasses { - - private final Map> nodeToEquivalenceClass; - - protected EquivalenceClasses() { - nodeToEquivalenceClass = new HashMap<>(); - } - - protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { - Set c1 = nodeToEquivalenceClass.get(p1); - Set c2 = nodeToEquivalenceClass.get(p2); - if (c1 != null && c2 != null) { - // Both present, we need to merge - if (c1.size() < c2.size()) { - // We swap them to merge - Set c2Temp = c2; - c2 = c1; - c1 = c2Temp; - } - for (RexTableInputRef newRef : c2) { - c1.add(newRef); - nodeToEquivalenceClass.put(newRef, c1); - } - } else if (c1 != null) { - // p1 present, we need to merge into it - c1.add(p2); - nodeToEquivalenceClass.put(p2, c1); - } else if (c2 != null) { - // p2 present, we need to merge into it - c2.add(p1); - nodeToEquivalenceClass.put(p1, c2); - } else { - // None are present, add to same equivalence class - Set equivalenceClass = new LinkedHashSet<>(); - equivalenceClass.add(p1); - equivalenceClass.add(p2); - nodeToEquivalenceClass.put(p1, equivalenceClass); - nodeToEquivalenceClass.put(p2, equivalenceClass); - } - } - - protected Map> getEquivalenceClassesMap() { - return nodeToEquivalenceClass; - } - - protected static EquivalenceClasses copy(EquivalenceClasses ec) { - final EquivalenceClasses newEc = new EquivalenceClasses(); - for (Entry> e : ec.nodeToEquivalenceClass.entrySet()) { - newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); - } - return newEc; - } - } - private enum Mode { // Removes join operator from the plan REMOVE, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 22f3266c87..3bf9ab415a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -528,7 +528,15 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept this.ctx.setCboSucceeded(true); if (this.ctx.isExplainPlan()) { ExplainConfiguration explainConfig = this.ctx.getExplainConfig(); - if (explainConfig.isExtended() || explainConfig.isFormatted()) { + if (explainConfig.isCbo()) { + if (explainConfig.isCboExtended()) { + // Include join cost + this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + } else { + // Do not include join cost + this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan)); + } + } else if (explainConfig.isExtended() || explainConfig.isFormatted()) { this.ctx.setOptimizedSql(getOptimizedSql(newPlan)); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index a92502e746..28a7b43f18 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -41,6 +41,8 @@ private boolean extended = false; private boolean formatted = false; private boolean dependency = false; + private boolean cbo = false; + private boolean cboExtended = false; private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; @@ -84,6 +86,22 @@ public void setDependency(boolean dependency) { this.dependency = dependency; } + public boolean isCbo() { + return cbo; + } + + public void setCbo(boolean cbo) { + this.cbo = cbo; + } + + public boolean isCboExtended() { + return cboExtended; + } + + public void setCboExtended(boolean cboExtended) { + this.cboExtended = cboExtended; + } + public boolean isLogical() { return logical; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 49b614634f..6721a3761c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -75,6 +75,14 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { config.setExtended(true); } else if (explainOptions == HiveParser.KW_DEPENDENCY) { config.setDependency(true); + } else if (explainOptions == HiveParser.KW_CBO) { + config.setCbo(true); + if (i + 1 < childCount) { + if (ast.getChild(i + 1).getType() == HiveParser.KW_EXTENDED) { + config.setCboExtended(true); + i++; + } + } } else if (explainOptions == HiveParser.KW_LOGICAL) { config.setLogical(true); } else if (explainOptions == HiveParser.KW_AUTHORIZATION) { @@ -191,6 +199,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() + && !config.isCbo() && !config.isLogical() && !config.isAuthorize() && ( @@ -216,7 +225,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { sem, config, ctx.getCboInfo(), - ctx.getOptimizedSql()); + ctx.getOptimizedSql(), + ctx.getCalcitePlan()); work.setAppendTaskType( HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 8bf9cc0ad6..253633cfb3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -193,6 +193,7 @@ KW_DEBUG: 'DEBUG'; KW_FORMATTED: 'FORMATTED'; KW_DEPENDENCY: 'DEPENDENCY'; KW_LOGICAL: 'LOGICAL'; +KW_CBO: 'CBO'; KW_SERDE: 'SERDE'; KW_WITH: 'WITH'; KW_DEFERRED: 'DEFERRED'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index bc95c46d24..7dda8b3cfe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -805,6 +805,7 @@ explainOption : KW_EXTENDED | KW_FORMATTED | KW_DEPENDENCY + | KW_CBO KW_EXTENDED? | KW_LOGICAL | KW_AUTHORIZATION | KW_ANALYZE diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index fa033d74e6..417955cde7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -794,7 +794,7 @@ principalIdentifier nonReserved : KW_ABORT | KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | KW_BEFORE | KW_BUCKET | KW_BUCKETS - | KW_CASCADE | KW_CHANGE | KW_CHECK | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS + | KW_CASCADE | KW_CBO | KW_CHANGE | KW_CHECK | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_DATA | KW_DAY | KW_DATABASES | KW_DATETIME | KW_DBPROPERTIES | KW_DEFERRED | KW_DEFINED | KW_DELIMITED | KW_DEPENDENCY | KW_DESC | KW_DIRECTORIES | KW_DIRECTORY | KW_DISABLE | KW_DISTRIBUTE | KW_DOW | KW_ELEM_TYPE diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 01da4d558d..8a60d59d16 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -54,6 +54,7 @@ boolean appendTaskType; String cboInfo; + String cboPlan; private String optimizedSQL; @@ -70,7 +71,8 @@ public ExplainWork(Path resFile, BaseSemanticAnalyzer analyzer, ExplainConfiguration config, String cboInfo, - String optimizedSQL) { + String optimizedSQL, + String cboPlan) { this.resFile = resFile; this.rootTasks = new ArrayList>(rootTasks); this.fetchTask = fetchTask; @@ -87,6 +89,7 @@ public ExplainWork(Path resFile, this.pCtx = pCtx; this.cboInfo = cboInfo; this.optimizedSQL = optimizedSQL; + this.cboPlan = cboPlan; this.config = config; } @@ -177,6 +180,10 @@ public void setParseContext(ParseContext pCtx) { this.pCtx = pCtx; } + public boolean isCbo() { + return config.isCbo(); + } + public boolean isLogical() { return config.isLogical(); } @@ -217,6 +224,14 @@ public void setOptimizedSQL(String optimizedSQL) { this.optimizedSQL = optimizedSQL; } + public String getCboPlan() { + return cboPlan; + } + + public void setCboPlan(String cboPlan) { + this.cboPlan = cboPlan; + } + public ExplainConfiguration getConfig() { return config; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index 932f4e850b..f449c6b408 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -300,7 +300,7 @@ private String explain(SemanticAnalyzer sem, QueryPlan plan) throws ExplainConfiguration config = new ExplainConfiguration(); config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString()); + sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), null); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null); diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query1.q b/ql/src/test/queries/clientpositive/perf/cbo_query1.q new file mode 100644 index 0000000000..7cb0cd20b7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query1.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query1.tpl and seed 2031708268 +explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100; + +-- end query 1 in stream 0 using template query1.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query10.q b/ql/src/test/queries/clientpositive/perf/cbo_query10.q new file mode 100644 index 0000000000..fbdc9db6dd --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query10.q @@ -0,0 +1,61 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query10.tpl and seed 797269820 +explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100; + +-- end query 1 in stream 0 using template query10.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query11.q b/ql/src/test/queries/clientpositive/perf/cbo_query11.q new file mode 100644 index 0000000000..09d9529d71 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query11.q @@ -0,0 +1,77 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query11.tpl and seed 1819994127 +explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100; + +-- end query 1 in stream 0 using template query11.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query12.q b/ql/src/test/queries/clientpositive/perf/cbo_query12.q new file mode 100644 index 0000000000..41029cfa59 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query12.q @@ -0,0 +1,35 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query12.tpl and seed 345591136 +explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100; + +-- end query 1 in stream 0 using template query12.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query13.q b/ql/src/test/queries/clientpositive/perf/cbo_query13.q new file mode 100644 index 0000000000..72eb08d499 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query13.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query13.tpl and seed 622697896 +explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +; + +-- end query 1 in stream 0 using template query13.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query14.q b/ql/src/test/queries/clientpositive/perf/cbo_query14.q new file mode 100644 index 0000000000..eaee914c8d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query14.q @@ -0,0 +1,104 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query14.tpl and seed 1819994127 +explain cbo +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query15.q b/ql/src/test/queries/clientpositive/perf/cbo_query15.q new file mode 100644 index 0000000000..3beea4ea13 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query15.q @@ -0,0 +1,22 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query15.tpl and seed 1819994127 +explain cbo +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100; + +-- end query 1 in stream 0 using template query15.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query16.q b/ql/src/test/queries/clientpositive/perf/cbo_query16.q new file mode 100644 index 0000000000..74245aa10b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query16.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query16.tpl and seed 171719422 +explain cbo +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100; + +-- end query 1 in stream 0 using template query16.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query17.q b/ql/src/test/queries/clientpositive/perf/cbo_query17.q new file mode 100644 index 0000000000..5bf98641ee --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query17.q @@ -0,0 +1,47 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query17.tpl and seed 1819994127 +explain cbo +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100; + +-- end query 1 in stream 0 using template query17.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query18.q b/ql/src/test/queries/clientpositive/perf/cbo_query18.q new file mode 100644 index 0000000000..110bee5a3f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query18.q @@ -0,0 +1,36 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query18.tpl and seed 1978355063 +explain cbo +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100; + +-- end query 1 in stream 0 using template query18.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query19.q b/ql/src/test/queries/clientpositive/perf/cbo_query19.q new file mode 100644 index 0000000000..abcec3601f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query19.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query19.tpl and seed 1930872976 +explain cbo +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 ; + +-- end query 1 in stream 0 using template query19.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query2.q b/ql/src/test/queries/clientpositive/perf/cbo_query2.q new file mode 100644 index 0000000000..9fcccbf4e4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query2.q @@ -0,0 +1,62 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query2.tpl and seed 1819994127 +explain cbo +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1; + +-- end query 1 in stream 0 using template query2.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query20.q b/ql/src/test/queries/clientpositive/perf/cbo_query20.q new file mode 100644 index 0000000000..1d361b8b36 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query20.q @@ -0,0 +1,31 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query20.tpl and seed 345591136 +explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100; + +-- end query 1 in stream 0 using template query20.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query21.q b/ql/src/test/queries/clientpositive/perf/cbo_query21.q new file mode 100644 index 0000000000..90daea02c3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query21.q @@ -0,0 +1,32 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query21.tpl and seed 1819994127 +explain cbo +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100; + +-- end query 1 in stream 0 using template query21.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query22.q b/ql/src/test/queries/clientpositive/perf/cbo_query22.q new file mode 100644 index 0000000000..14ceec4174 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query22.q @@ -0,0 +1,24 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query22.tpl and seed 1819994127 +explain cbo +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100; + +-- end query 1 in stream 0 using template query22.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query23.q b/ql/src/test/queries/clientpositive/perf/cbo_query23.q new file mode 100644 index 0000000000..a1c661b086 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query23.q @@ -0,0 +1,52 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query23.tpl and seed 2031708268 +explain cbo +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query24.q b/ql/src/test/queries/clientpositive/perf/cbo_query24.q new file mode 100644 index 0000000000..02bcbafb7e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query24.q @@ -0,0 +1,51 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query24.tpl and seed 1220860970 +explain cbo +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +; diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query25.q b/ql/src/test/queries/clientpositive/perf/cbo_query25.q new file mode 100644 index 0000000000..9611e28f83 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query25.q @@ -0,0 +1,50 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query25.tpl and seed 1819994127 +explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100; + +-- end query 1 in stream 0 using template query25.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query26.q b/ql/src/test/queries/clientpositive/perf/cbo_query26.q new file mode 100644 index 0000000000..8b874c85bc --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query26.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query26.tpl and seed 1930872976 +explain cbo +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100; + +-- end query 1 in stream 0 using template query26.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query27.q b/ql/src/test/queries/clientpositive/perf/cbo_query27.q new file mode 100644 index 0000000000..48eaad5b54 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query27.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query27.tpl and seed 2017787633 +explain cbo +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100; + +-- end query 1 in stream 0 using template query27.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query28.q b/ql/src/test/queries/clientpositive/perf/cbo_query28.q new file mode 100644 index 0000000000..ad9dacd65e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query28.q @@ -0,0 +1,59 @@ +set hive.mapred.mode=nonstrict; +set hive.optimize.metadataonly=true; + +-- start query 1 in stream 0 using template query28.tpl and seed 444293455 +explain cbo +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100; + +-- end query 1 in stream 0 using template query28.tpl + +set hive.optimize.metadataonly=false; diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query29.q b/ql/src/test/queries/clientpositive/perf/cbo_query29.q new file mode 100644 index 0000000000..ea9ec12d33 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query29.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query29.tpl and seed 2031708268 +explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100; + +-- end query 1 in stream 0 using template query29.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query3.q b/ql/src/test/queries/clientpositive/perf/cbo_query3.q new file mode 100644 index 0000000000..b1bc55bfdf --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query3.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query3.tpl and seed 2031708268 +explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100; + +-- end query 1 in stream 0 using template query3.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query30.q b/ql/src/test/queries/clientpositive/perf/cbo_query30.q new file mode 100644 index 0000000000..ece63700c0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query30.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query30.tpl and seed 1819994127 +explain cbo +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100; + +-- end query 1 in stream 0 using template query30.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query31.q b/ql/src/test/queries/clientpositive/perf/cbo_query31.q new file mode 100644 index 0000000000..a2babbf23d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query31.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query31.tpl and seed 1819994127 +explain cbo +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year; + +-- end query 1 in stream 0 using template query31.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query32.q b/ql/src/test/queries/clientpositive/perf/cbo_query32.q new file mode 100644 index 0000000000..4f3c7291a8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query32.q @@ -0,0 +1,30 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query32.tpl and seed 2031708268 +explain cbo +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100; + +-- end query 1 in stream 0 using template query32.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query33.q b/ql/src/test/queries/clientpositive/perf/cbo_query33.q new file mode 100644 index 0000000000..fc808aeddb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query33.q @@ -0,0 +1,77 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query33.tpl and seed 1930872976 +explain cbo +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100; + +-- end query 1 in stream 0 using template query33.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query34.q b/ql/src/test/queries/clientpositive/perf/cbo_query34.q new file mode 100644 index 0000000000..2b93a6144c --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query34.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query34.tpl and seed 1971067816 +explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc; + +-- end query 1 in stream 0 using template query34.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query35.q b/ql/src/test/queries/clientpositive/perf/cbo_query35.q new file mode 100644 index 0000000000..9e877806b2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query35.q @@ -0,0 +1,59 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query35.tpl and seed 1930872976 +explain cbo +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100; + +-- end query 1 in stream 0 using template query35.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query36.q b/ql/src/test/queries/clientpositive/perf/cbo_query36.q new file mode 100644 index 0000000000..cab89deeb1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query36.q @@ -0,0 +1,32 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query36.tpl and seed 1544728811 +explain cbo +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100; + +-- end query 1 in stream 0 using template query36.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query37.q b/ql/src/test/queries/clientpositive/perf/cbo_query37.q new file mode 100644 index 0000000000..f258dc0aa0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query37.q @@ -0,0 +1,19 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query37.tpl and seed 301843662 +explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100; + +-- end query 1 in stream 0 using template query37.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query38.q b/ql/src/test/queries/clientpositive/perf/cbo_query38.q new file mode 100644 index 0000000000..55beed22ce --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query38.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query38.tpl and seed 1819994127 +explain cbo +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100; + +-- end query 1 in stream 0 using template query38.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query39.q b/ql/src/test/queries/clientpositive/perf/cbo_query39.q new file mode 100644 index 0000000000..1970c05c9b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query39.q @@ -0,0 +1,56 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query39.tpl and seed 1327317894 +explain cbo +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +; +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +; + +-- end query 1 in stream 0 using template query39.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query4.q b/ql/src/test/queries/clientpositive/perf/cbo_query4.q new file mode 100644 index 0000000000..b47d174331 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query4.q @@ -0,0 +1,111 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query4.tpl and seed 1819994127 +explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100; + +-- end query 1 in stream 0 using template query4.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query40.q b/ql/src/test/queries/clientpositive/perf/cbo_query40.q new file mode 100644 index 0000000000..b54ae4b39e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query40.q @@ -0,0 +1,30 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query40.tpl and seed 1819994127 +explain cbo +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100; + +-- end query 1 in stream 0 using template query40.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query42.q b/ql/src/test/queries/clientpositive/perf/cbo_query42.q new file mode 100644 index 0000000000..43fb4ee625 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query42.q @@ -0,0 +1,24 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query42.tpl and seed 1819994127 +explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 ; + +-- end query 1 in stream 0 using template query42.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query43.q b/ql/src/test/queries/clientpositive/perf/cbo_query43.q new file mode 100644 index 0000000000..e4aff114be --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query43.q @@ -0,0 +1,21 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query43.tpl and seed 1819994127 +explain cbo +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100; + +-- end query 1 in stream 0 using template query43.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query44.q b/ql/src/test/queries/clientpositive/perf/cbo_query44.q new file mode 100644 index 0000000000..814a721330 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query44.q @@ -0,0 +1,37 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query44.tpl and seed 1819994127 +explain cbo +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100; + +-- end query 1 in stream 0 using template query44.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query45.q b/ql/src/test/queries/clientpositive/perf/cbo_query45.q new file mode 100644 index 0000000000..e1b4348b7a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query45.q @@ -0,0 +1,22 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query45.tpl and seed 2031708268 +explain cbo +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100; + +-- end query 1 in stream 0 using template query45.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query46.q b/ql/src/test/queries/clientpositive/perf/cbo_query46.q new file mode 100644 index 0000000000..6a7a94ee6f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query46.q @@ -0,0 +1,37 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query46.tpl and seed 803547492 +explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100; + +-- end query 1 in stream 0 using template query46.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query47.q b/ql/src/test/queries/clientpositive/perf/cbo_query47.q new file mode 100644 index 0000000000..7348e4b6e7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query47.q @@ -0,0 +1,53 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query47.tpl and seed 2031708268 +explain cbo +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100; + +-- end query 1 in stream 0 using template query47.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query48.q b/ql/src/test/queries/clientpositive/perf/cbo_query48.q new file mode 100644 index 0000000000..21db45fc31 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query48.q @@ -0,0 +1,69 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query48.tpl and seed 622697896 +explain cbo +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +; + +-- end query 1 in stream 0 using template query48.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query49.q b/ql/src/test/queries/clientpositive/perf/cbo_query49.q new file mode 100644 index 0000000000..fccf25d000 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query49.q @@ -0,0 +1,129 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query49.tpl and seed 1819994127 +explain cbo +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100; + +-- end query 1 in stream 0 using template query49.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query5.q b/ql/src/test/queries/clientpositive/perf/cbo_query5.q new file mode 100644 index 0000000000..405b2da571 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query5.q @@ -0,0 +1,130 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query5.tpl and seed 1819994127 +explain cbo +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100; + +-- end query 1 in stream 0 using template query5.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query50.q b/ql/src/test/queries/clientpositive/perf/cbo_query50.q new file mode 100644 index 0000000000..1fb8051080 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query50.q @@ -0,0 +1,61 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query50.tpl and seed 1819994127 +explain cbo +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100; + +-- end query 1 in stream 0 using template query50.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query51.q b/ql/src/test/queries/clientpositive/perf/cbo_query51.q new file mode 100644 index 0000000000..16ba023f49 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query51.q @@ -0,0 +1,47 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query51.tpl and seed 1819994127 +explain cbo +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100; + +-- end query 1 in stream 0 using template query51.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query52.q b/ql/src/test/queries/clientpositive/perf/cbo_query52.q new file mode 100644 index 0000000000..222550240b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query52.q @@ -0,0 +1,24 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query52.tpl and seed 1819994127 +explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 ; + +-- end query 1 in stream 0 using template query52.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query53.q b/ql/src/test/queries/clientpositive/perf/cbo_query53.q new file mode 100644 index 0000000000..c0edd67c95 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query53.q @@ -0,0 +1,30 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query53.tpl and seed 1819994127 +explain cbo +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100; + +-- end query 1 in stream 0 using template query53.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query54.q b/ql/src/test/queries/clientpositive/perf/cbo_query54.q new file mode 100644 index 0000000000..1e626c155d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query54.q @@ -0,0 +1,58 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query54.tpl and seed 1930872976 +explain cbo +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100; + +-- end query 1 in stream 0 using template query54.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query55.q b/ql/src/test/queries/clientpositive/perf/cbo_query55.q new file mode 100644 index 0000000000..55176b2e33 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query55.q @@ -0,0 +1,16 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query55.tpl and seed 2031708268 +explain cbo +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 ; + +-- end query 1 in stream 0 using template query55.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query56.q b/ql/src/test/queries/clientpositive/perf/cbo_query56.q new file mode 100644 index 0000000000..15dbc105f5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query56.q @@ -0,0 +1,70 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query56.tpl and seed 1951559352 +explain cbo +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100; + +-- end query 1 in stream 0 using template query56.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query57.q b/ql/src/test/queries/clientpositive/perf/cbo_query57.q new file mode 100644 index 0000000000..da23fd2a56 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query57.q @@ -0,0 +1,50 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query57.tpl and seed 2031708268 +explain cbo +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100; + +-- end query 1 in stream 0 using template query57.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query58.q b/ql/src/test/queries/clientpositive/perf/cbo_query58.q new file mode 100644 index 0000000000..bbc1b96882 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query58.q @@ -0,0 +1,67 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query58.tpl and seed 1819994127 +explain cbo +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100; + +-- end query 1 in stream 0 using template query58.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query59.q b/ql/src/test/queries/clientpositive/perf/cbo_query59.q new file mode 100644 index 0000000000..9517f71870 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query59.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query59.tpl and seed 1819994127 +explain cbo +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100; + +-- end query 1 in stream 0 using template query59.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query6.q b/ql/src/test/queries/clientpositive/perf/cbo_query6.q new file mode 100644 index 0000000000..d9ed084682 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query6.q @@ -0,0 +1,30 @@ +set hive.auto.convert.join=true; +set hive.tez.cartesian-product.enabled=true; +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query6.tpl and seed 1819994127 +explain cbo +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100; + +-- end query 1 in stream 0 using template query6.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query60.q b/ql/src/test/queries/clientpositive/perf/cbo_query60.q new file mode 100644 index 0000000000..71c33499ee --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query60.q @@ -0,0 +1,80 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query60.tpl and seed 1930872976 +explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100; + +-- end query 1 in stream 0 using template query60.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query61.q b/ql/src/test/queries/clientpositive/perf/cbo_query61.q new file mode 100644 index 0000000000..83c95bffa5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query61.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query61.tpl and seed 1930872976 +explain cbo +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100; + +-- end query 1 in stream 0 using template query61.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query63.q b/ql/src/test/queries/clientpositive/perf/cbo_query63.q new file mode 100644 index 0000000000..0553f4c931 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query63.q @@ -0,0 +1,31 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query63.tpl and seed 1819994127 +explain cbo +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100; + +-- end query 1 in stream 0 using template query63.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query64.q b/ql/src/test/queries/clientpositive/perf/cbo_query64.q new file mode 100644 index 0000000000..24f3d856ed --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query64.q @@ -0,0 +1,121 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query64.tpl and seed 1220860970 +explain cbo +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt; + +-- end query 1 in stream 0 using template query64.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query65.q b/ql/src/test/queries/clientpositive/perf/cbo_query65.q new file mode 100644 index 0000000000..e8b752dfc0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query65.q @@ -0,0 +1,31 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query65.tpl and seed 1819994127 +explain cbo +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100; + +-- end query 1 in stream 0 using template query65.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query66.q b/ql/src/test/queries/clientpositive/perf/cbo_query66.q new file mode 100644 index 0000000000..518293554d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query66.q @@ -0,0 +1,224 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query66.tpl and seed 2042478054 +explain cbo +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100; + +-- end query 1 in stream 0 using template query66.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query67.q b/ql/src/test/queries/clientpositive/perf/cbo_query67.q new file mode 100644 index 0000000000..5781aacd0a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query67.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query67.tpl and seed 1819994127 +explain cbo +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100; + +-- end query 1 in stream 0 using template query67.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query68.q b/ql/src/test/queries/clientpositive/perf/cbo_query68.q new file mode 100644 index 0000000000..520b9d77fb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query68.q @@ -0,0 +1,44 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query68.tpl and seed 803547492 +explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100; + +-- end query 1 in stream 0 using template query68.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query69.q b/ql/src/test/queries/clientpositive/perf/cbo_query69.q new file mode 100644 index 0000000000..01183fba76 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query69.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query69.tpl and seed 797269820 +explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100; + +-- end query 1 in stream 0 using template query69.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query7.q b/ql/src/test/queries/clientpositive/perf/cbo_query7.q new file mode 100644 index 0000000000..3fb7130fff --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query7.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query7.tpl and seed 1930872976 +explain cbo +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100; + +-- end query 1 in stream 0 using template query7.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query70.q b/ql/src/test/queries/clientpositive/perf/cbo_query70.q new file mode 100644 index 0000000000..5b8b76e315 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query70.q @@ -0,0 +1,40 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query70.tpl and seed 1819994127 +explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100; + +-- end query 1 in stream 0 using template query70.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query71.q b/ql/src/test/queries/clientpositive/perf/cbo_query71.q new file mode 100644 index 0000000000..e4d4df5443 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query71.q @@ -0,0 +1,42 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query71.tpl and seed 2031708268 +explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + ; + +-- end query 1 in stream 0 using template query71.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query72.q b/ql/src/test/queries/clientpositive/perf/cbo_query72.q new file mode 100644 index 0000000000..941a643d7f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query72.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query72.tpl and seed 2031708268 +explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100; + +-- end query 1 in stream 0 using template query72.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query73.q b/ql/src/test/queries/clientpositive/perf/cbo_query73.q new file mode 100644 index 0000000000..c78225efb2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query73.q @@ -0,0 +1,30 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query73.tpl and seed 1971067816 +explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc; + +-- end query 1 in stream 0 using template query73.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query74.q b/ql/src/test/queries/clientpositive/perf/cbo_query74.q new file mode 100644 index 0000000000..71954c8781 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query74.q @@ -0,0 +1,63 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query74.tpl and seed 1556717815 +explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100; + +-- end query 1 in stream 0 using template query74.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query75.q b/ql/src/test/queries/clientpositive/perf/cbo_query75.q new file mode 100644 index 0000000000..2d92853feb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query75.q @@ -0,0 +1,72 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query75.tpl and seed 1819994127 +explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100; + +-- end query 1 in stream 0 using template query75.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query76.q b/ql/src/test/queries/clientpositive/perf/cbo_query76.q new file mode 100644 index 0000000000..9d2d89efab --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query76.q @@ -0,0 +1,26 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query76.tpl and seed 2031708268 +explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100; + +-- end query 1 in stream 0 using template query76.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query77.q b/ql/src/test/queries/clientpositive/perf/cbo_query77.q new file mode 100644 index 0000000000..3d0f484226 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query77.q @@ -0,0 +1,109 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query77.tpl and seed 1819994127 +explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100; + +-- end query 1 in stream 0 using template query77.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query78.q b/ql/src/test/queries/clientpositive/perf/cbo_query78.q new file mode 100644 index 0000000000..87f94dcc4a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query78.q @@ -0,0 +1,60 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query78.tpl and seed 1819994127 +explain cbo +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100; + +-- end query 1 in stream 0 using template query78.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query79.q b/ql/src/test/queries/clientpositive/perf/cbo_query79.q new file mode 100644 index 0000000000..ce05a9a220 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query79.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query79.tpl and seed 2031708268 +explain cbo +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100; + +-- end query 1 in stream 0 using template query79.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query8.q b/ql/src/test/queries/clientpositive/perf/cbo_query8.q new file mode 100644 index 0000000000..71ab23418a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query8.q @@ -0,0 +1,110 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query8.tpl and seed 1766988859 +explain cbo +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100; + +-- end query 1 in stream 0 using template query8.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query80.q b/ql/src/test/queries/clientpositive/perf/cbo_query80.q new file mode 100644 index 0000000000..16b1da2b09 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query80.q @@ -0,0 +1,98 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query80.tpl and seed 1819994127 +explain cbo +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100; + +-- end query 1 in stream 0 using template query80.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query81.q b/ql/src/test/queries/clientpositive/perf/cbo_query81.q new file mode 100644 index 0000000000..722bc9e6f3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query81.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query81.tpl and seed 1819994127 +explain cbo +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100; + +-- end query 1 in stream 0 using template query81.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query82.q b/ql/src/test/queries/clientpositive/perf/cbo_query82.q new file mode 100644 index 0000000000..932a71bf99 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query82.q @@ -0,0 +1,19 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query82.tpl and seed 55585014 +explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100; + +-- end query 1 in stream 0 using template query82.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query83.q b/ql/src/test/queries/clientpositive/perf/cbo_query83.q new file mode 100644 index 0000000000..551ea0da0d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query83.q @@ -0,0 +1,69 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query83.tpl and seed 1930872976 +explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100; + +-- end query 1 in stream 0 using template query83.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query84.q b/ql/src/test/queries/clientpositive/perf/cbo_query84.q new file mode 100644 index 0000000000..d36df57450 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query84.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query84.tpl and seed 1819994127 +explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100; + +-- end query 1 in stream 0 using template query84.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query85.q b/ql/src/test/queries/clientpositive/perf/cbo_query85.q new file mode 100644 index 0000000000..e09137be9b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query85.q @@ -0,0 +1,86 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query85.tpl and seed 622697896 +explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100; + +-- end query 1 in stream 0 using template query85.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query86.q b/ql/src/test/queries/clientpositive/perf/cbo_query86.q new file mode 100644 index 0000000000..a8e9941cd0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query86.q @@ -0,0 +1,28 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query86.tpl and seed 1819994127 +explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100; + +-- end query 1 in stream 0 using template query86.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query87.q b/ql/src/test/queries/clientpositive/perf/cbo_query87.q new file mode 100644 index 0000000000..79f8437ce7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query87.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query87.tpl and seed 1819994127 +explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +; + +-- end query 1 in stream 0 using template query87.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query88.q b/ql/src/test/queries/clientpositive/perf/cbo_query88.q new file mode 100644 index 0000000000..09f95b980a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query88.q @@ -0,0 +1,96 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query88.tpl and seed 318176889 +explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +; + +-- end query 1 in stream 0 using template query88.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query89.q b/ql/src/test/queries/clientpositive/perf/cbo_query89.q new file mode 100644 index 0000000000..768240419a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query89.q @@ -0,0 +1,30 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query89.tpl and seed 1719819282 +explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100; + +-- end query 1 in stream 0 using template query89.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query9.q b/ql/src/test/queries/clientpositive/perf/cbo_query9.q new file mode 100644 index 0000000000..46d8868ac6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query9.q @@ -0,0 +1,53 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query9.tpl and seed 1490436826 +explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +; + +-- end query 1 in stream 0 using template query9.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query90.q b/ql/src/test/queries/clientpositive/perf/cbo_query90.q new file mode 100644 index 0000000000..fc75e67a8e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query90.q @@ -0,0 +1,24 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query90.tpl and seed 2031708268 +explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100; + +-- end query 1 in stream 0 using template query90.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query91.q b/ql/src/test/queries/clientpositive/perf/cbo_query91.q new file mode 100644 index 0000000000..7f1f8d8163 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query91.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query91.tpl and seed 1930872976 +explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc; + +-- end query 1 in stream 0 using template query91.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query92.q b/ql/src/test/queries/clientpositive/perf/cbo_query92.q new file mode 100644 index 0000000000..23b32eaf68 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query92.q @@ -0,0 +1,32 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query92.tpl and seed 2031708268 +explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100; + +-- end query 1 in stream 0 using template query92.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query93.q b/ql/src/test/queries/clientpositive/perf/cbo_query93.q new file mode 100644 index 0000000000..c6a7d12caf --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query93.q @@ -0,0 +1,20 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query93.tpl and seed 1200409435 +explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100; + +-- end query 1 in stream 0 using template query93.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query94.q b/ql/src/test/queries/clientpositive/perf/cbo_query94.q new file mode 100644 index 0000000000..4ae01bf27e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query94.q @@ -0,0 +1,31 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query94.tpl and seed 2031708268 +explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100; + +-- end query 1 in stream 0 using template query94.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query95.q b/ql/src/test/queries/clientpositive/perf/cbo_query95.q new file mode 100644 index 0000000000..58c9be5047 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query95.q @@ -0,0 +1,34 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query95.tpl and seed 2031708268 +explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100; + +-- end query 1 in stream 0 using template query95.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query96.q b/ql/src/test/queries/clientpositive/perf/cbo_query96.q new file mode 100644 index 0000000000..1561fe3eb3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query96.q @@ -0,0 +1,18 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query96.tpl and seed 1819994127 +explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100; + +-- end query 1 in stream 0 using template query96.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query97.q b/ql/src/test/queries/clientpositive/perf/cbo_query97.q new file mode 100644 index 0000000000..cb754dd3fe --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query97.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query97.tpl and seed 1819994127 +explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100; + +-- end query 1 in stream 0 using template query97.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query98.q b/ql/src/test/queries/clientpositive/perf/cbo_query98.q new file mode 100644 index 0000000000..a48f81e11f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query98.q @@ -0,0 +1,34 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query98.tpl and seed 345591136 +explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio; + +-- end query 1 in stream 0 using template query98.tpl diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query99.q b/ql/src/test/queries/clientpositive/perf/cbo_query99.q new file mode 100644 index 0000000000..be098fa1c8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query99.q @@ -0,0 +1,37 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query99.tpl and seed 1819994127 +explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100; + +-- end query 1 in stream 0 using template query99.tpl diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out new file mode 100644 index 0000000000..295ba99d53 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1]) + HiveJoin(condition=[AND(=($3, $8), >($4, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[CAST(_UTF-16LE'NM'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out new file mode 100644 index 0000000000..b226905d4e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out @@ -0,0 +1,178 @@ +PREHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=[$8], sort6=[$10], sort7=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], fetch=[100]) + HiveProject(cd_gender=[$0], cd_marital_status=[$1], cd_education_status=[$2], cnt1=[$8], cd_purchase_estimate=[$3], cnt2=[$8], cd_credit_rating=[$4], cnt3=[$8], cd_dep_count=[$5], cnt4=[$8], cd_dep_employed_count=[$6], cnt5=[$8], cd_dep_college_count=[$7], cnt6=[$8]) + HiveAggregate(group=[{6, 7, 8, 9, 10, 11, 12, 13}], agg#0=[count()]) + HiveFilter(condition=[OR(IS NOT NULL($15), IS NOT NULL($17))]) + HiveJoin(condition=[=($0, $16)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $14)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IN($7, _UTF-16LE'Walker County', _UTF-16LE'Richland County', _UTF-16LE'Gaines County', _UTF-16LE'Douglas County', _UTF-16LE'Dona Ana County'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out new file mode 100644 index 0000000000..e09f7ecf7b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(c_preferred_cust_flag=[$1]) + HiveJoin(condition=[AND(=($0, $5), CASE(CAST(IS NOT NULL($6)):BOOLEAN, CASE(CAST(IS NOT NULL($8)):BOOLEAN, >(/($4, $8), /($2, $6)), >(null, /($2, $6))), CASE(CAST(IS NOT NULL($8)):BOOLEAN, >(/($4, $8), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f9=[-($11, $10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_list_price=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[-($11, $10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f9=[$7]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f9=[-($11, $10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_list_price=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$7]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[-($11, $10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out new file mode 100644 index 0000000000..dbfe6e72b4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out new file mode 100644 index 0000000000..ccad0880c4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out @@ -0,0 +1,141 @@ +PREHOOK: query: explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) + HiveAggregate(group=[{}], agg#0=[sum($16)], agg#1=[count($16)], agg#2=[sum($18)], agg#3=[count($18)], agg#4=[sum($19)], agg#5=[count($19)]) + HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $17, 100, 150), =($7, 3)), AND(=($1, _UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $17, 50, 100), =($7, 1)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $17, 150, 200), =($7, 1))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[AND(=($11, $0), OR(AND(IN($1, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $17, 100, 200)), AND(IN($1, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $17, 150, 300)), AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $17, 50, 250))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3]) + HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_sales_price=[$13], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_net_profit=[$22]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 100, 200), BETWEEN(false, $22, 150, 300), BETWEEN(false, $22, 50, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out new file mode 100644 index 0000000000..9bb4f2e7f2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out @@ -0,0 +1,618 @@ +Warning: Shuffle Join MERGEJOIN[1446][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1458][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1448][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[1471][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[1450][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1484][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product +PREHOOK: query: explain cbo +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2, 3}, {0, 1, 2}, {0, 1}, {0}, {}]], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], sales=[$4], number_sales=[$5]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store'], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web'], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{5, 6, 7}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 1998, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out new file mode 100644 index 0000000000..02ad7c207c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out @@ -0,0 +1,69 @@ +PREHOOK: query: explain cbo +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($7)]) + HiveJoin(condition=[AND(=($6, $0), OR(IN(substr($4, 1, 5), _UTF-16LE'85669', _UTF-16LE'86197', _UTF-16LE'88274', _UTF-16LE'83405', _UTF-16LE'86475', _UTF-16LE'85392', _UTF-16LE'85460', _UTF-16LE'80348', _UTF-16LE'81792'), >($7, 500), IN($3, _UTF-16LE'CA', _UTF-16LE'WA', _UTF-16LE'GA')))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_sales_price=[$2], d_date_sk=[$3], d_year=[$4], d_qoy=[$5]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(2):INTEGER]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out new file mode 100644 index 0000000000..e7d1199638 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: explain cbo +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col cs_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'NY'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$10], cs_call_center_sk=[$11], cs_warehouse_sk=[$14], cs_order_number=[$17], cs_ext_ship_cost=[$28], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($10), IS NOT NULL($11), IS NOT NULL($17))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00, 2001-05-31 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) + HiveFilter(condition=[AND(IN($25, _UTF-16LE'Ziebach County', _UTF-16LE'Levy County', _UTF-16LE'Huron County', _UTF-16LE'Franklin Parish', _UTF-16LE'Daviess County'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(cs_warehouse_sk=[$14], cs_order_number=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($14))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) + HiveProject(cr_order_number0=[$0], $f1=[true]) + HiveAggregate(group=[{16}]) + HiveFilter(condition=[IS NOT NULL($16)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out new file mode 100644 index 0000000000..6c5b48033c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out @@ -0,0 +1,141 @@ +PREHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], s_state=[$2], store_sales_quantitycount=[$3], store_sales_quantityave=[/(CAST($4):DOUBLE, $3)], store_sales_quantitystdev=[POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5)], store_sales_quantitycov=[/(POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5), /(CAST($4):DOUBLE, $3))], as_store_returns_quantitycount=[$7], as_store_returns_quantityave=[/(CAST($8):DOUBLE, $7)], as_store_returns_quantitystdev=[POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5)], store_returns_quantitycov=[/(POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5), /(CAST($8):DOUBLE, $7))], catalog_sales_quantitycount=[$11], catalog_sales_quantityave=[/(CAST($12):DOUBLE, $11)], catalog_sales_quantitystdev=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))], catalog_sales_quantitycov=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[sum($3)], agg#2=[sum($7)], agg#3=[sum($6)], agg#4=[count($4)], agg#5=[sum($4)], agg#6=[sum($9)], agg#7=[sum($8)], agg#8=[count($5)], agg#9=[sum($5)], agg#10=[sum($11)], agg#11=[sum($10)]) + HiveProject($f0=[$9], $f1=[$10], $f2=[$25], $f3=[$5], $f4=[$21], $f5=[$14], $f30=[CAST($5):DOUBLE], $f7=[*(CAST($5):DOUBLE, CAST($5):DOUBLE)], $f40=[CAST($21):DOUBLE], $f9=[*(CAST($21):DOUBLE, CAST($21):DOUBLE)], $f50=[CAST($14):DOUBLE], $f11=[*(CAST($14):DOUBLE, CAST($14):DOUBLE)]) + HiveJoin(condition=[=($24, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $19), =($1, $18)), =($4, $20))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_quarter_name=[CAST(_UTF-16LE'2000Q1'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($15, _UTF-16LE'2000Q1'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_quantity=[$3], d_date_sk=[$4], d_quarter_name=[$5], sr_returned_date_sk=[$6], sr_item_sk=[$7], sr_customer_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10], d_date_sk0=[$11], d_quarter_name0=[$12]) + HiveJoin(condition=[AND(=($8, $1), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_quarter_name=[$15]) + HiveFilter(condition=[AND(IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5], d_quarter_name=[$6]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_quarter_name=[$15]) + HiveFilter(condition=[AND(IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out new file mode 100644 index 0000000000..7e931c5141 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: explain cbo +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[/($4, $5)], $f5=[/($6, $7)], $f6=[/($8, $9)], $f7=[/($10, $11)], $f8=[/($12, $13)], $f9=[/($14, $15)], $f10=[/($16, $17)]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2, 3}, {0, 1, 2}, {0, 1}, {0}, {}]], agg#0=[sum($4)], agg#1=[count($4)], agg#2=[sum($5)], agg#3=[count($5)], agg#4=[sum($6)], agg#5=[count($6)], agg#6=[sum($7)], agg#7=[count($7)], agg#8=[sum($8)], agg#9=[count($8)], agg#10=[sum($9)], agg#11=[count($9)], agg#12=[sum($10)], agg#13=[count($10)]) + HiveProject($f0=[$11], $f1=[$8], $f2=[$7], $f3=[$6], $f4=[CAST($16):DECIMAL(12, 2)], $f5=[CAST($17):DECIMAL(12, 2)], $f6=[CAST($19):DECIMAL(12, 2)], $f7=[CAST($18):DECIMAL(12, 2)], $f8=[CAST($20):DECIMAL(12, 2)], $f9=[CAST($4):DECIMAL(12, 2)], $f10=[CAST($26):DECIMAL(12, 2)]) + HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4], c_birth_month=[$12], c_birth_year=[$13]) + HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], cs_sold_date_sk=[$2], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$5], cs_quantity=[$6], cs_list_price=[$7], cs_sales_price=[$8], cs_coupon_amt=[$9], cs_net_profit=[$10], d_date_sk=[$11], d_year=[$12], cd_demo_sk=[$13], cd_gender=[$14], cd_education_status=[$15], cd_dep_count=[$16]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], cd_gender=[CAST(_UTF-16LE'M'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_education_status=[CAST(_UTF-16LE'College'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_dep_count=[$6]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'M'), =($3, _UTF-16LE'College'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query19.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query19.q.out new file mode 100644 index 0000000000..c24d76f1c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query19.q.out @@ -0,0 +1,92 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], i_manufact_id=[$2], i_manufact=[$3], ext_price=[$4]) + HiveSortLimit(sort0=[$4], sort1=[$5], sort2=[$6], sort3=[$2], sort4=[$3], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(brand_id=[$0], brand=[$1], i_manufact_id=[$2], i_manufact=[$3], ext_price=[$4], (tok_table_or_col i_brand)=[$1], (tok_table_or_col i_brand_id)=[$0]) + HiveAggregate(group=[{13, 14, 15, 16}], agg#0=[sum($8)]) + HiveJoin(condition=[AND(<>(substr($3, 1, 5), substr($19, 1, 5)), =($7, $18))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], d_year=[$6], d_moy=[$7], i_item_sk=[$8], i_brand_id=[$9], i_brand=[$10], i_manufact_id=[$11], i_manufact=[$12], i_manager_id=[$13]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($8, 11), =($6, 1999), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manufact_id=[$13], i_manufact=[$14], i_manager_id=[CAST(7):INTEGER]) + HiveFilter(condition=[AND(=($20, 7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0], s_zip=[$25]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out new file mode 100644 index 0000000000..cca252eb3b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: explain cbo +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC]) + HiveProject(d_week_seq1=[$0], _o__c1=[round(/($1, $11), 2)], _o__c2=[round(/($2, $12), 2)], _o__c3=[round(/($3, $13), 2)], _o__c4=[round(/($4, $14), 2)], _o__c5=[round(/($5, $15), 2)], _o__c6=[round(/($6, $16), 2)], _o__c7=[round(/($7, $17), 2)]) + HiveJoin(condition=[=($0, -($10, 53))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) + HiveProject($f0=[$3], $f1=[CASE(=($4, _UTF-16LE'Sunday'), $1, null)], $f2=[CASE(=($4, _UTF-16LE'Monday'), $1, null)], $f3=[CASE(=($4, _UTF-16LE'Tuesday'), $1, null)], $f4=[CASE(=($4, _UTF-16LE'Wednesday'), $1, null)], $f5=[CASE(=($4, _UTF-16LE'Thursday'), $1, null)], $f6=[CASE(=($4, _UTF-16LE'Friday'), $1, null)], $f7=[CASE(=($4, _UTF-16LE'Saturday'), $1, null)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$1]) + HiveUnion(all=[true]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(cs_sold_date_sk=[$0], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], d_day_name=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq2=[$0], sun_sales2=[$1], mon_sales2=[$2], tue_sales2=[$3], wed_sales2=[$4], thu_sales2=[$5], fri_sales2=[$6], sat_sales2=[$7]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) + HiveProject($f0=[$3], $f1=[CASE(=($4, _UTF-16LE'Sunday'), $1, null)], $f2=[CASE(=($4, _UTF-16LE'Monday'), $1, null)], $f3=[CASE(=($4, _UTF-16LE'Tuesday'), $1, null)], $f4=[CASE(=($4, _UTF-16LE'Wednesday'), $1, null)], $f5=[CASE(=($4, _UTF-16LE'Thursday'), $1, null)], $f6=[CASE(=($4, _UTF-16LE'Friday'), $1, null)], $f7=[CASE(=($4, _UTF-16LE'Saturday'), $1, null)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$1]) + HiveUnion(all=[true]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(cs_sold_date_sk=[$0], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], d_day_name=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out new file mode 100644 index 0000000000..834c804dd7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out new file mode 100644 index 0000000000..a54a085ed0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out @@ -0,0 +1,90 @@ +PREHOOK: query: explain cbo +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveFilter(condition=[CASE(>($2, 0), BETWEEN(false, /(CAST($3):DOUBLE, CAST($2):DOUBLE), 6.66667E-1, 1.5E0), null)]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveProject($f0=[$1], $f1=[$9], $f2=[CASE(<(CAST($7):DATE, 1998-04-08), $5, 0)], $f3=[CASE(>=(CAST($7):DATE, 1998-04-08), $5, 0)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00, 1998-05-08 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_current_price=[$5]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99, 1.49), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out new file mode 100644 index 0000000000..c5118ee62f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out @@ -0,0 +1,72 @@ +PREHOOK: query: explain cbo +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$4], sort1=[$0], sort2=[$1], sort3=[$2], sort4=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject($f0=[$3], $f1=[$0], $f2=[$1], $f3=[$2], $f4=[/(CAST($4):DOUBLE, $5)]) + HiveAggregate(group=[{1, 2, 3, 4}], groups=[[{1, 2, 3, 4}, {1, 2, 4}, {1, 4}, {4}, {}]], agg#0=[sum($8)], agg#1=[count($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_product_name=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($2))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out new file mode 100644 index 0000000000..baf790e064 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -0,0 +1,281 @@ +Warning: Shuffle Join MERGEJOIN[589][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[590][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[592][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +PREHOOK: query: explain cbo +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($0)]) + HiveProject(sales=[$0]) + HiveUnion(all=[true]) + HiveProject(sales=[*(CAST($5):DECIMAL(10, 0), $6)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject($f0=[$3], $f1=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject($f0=[$0], $f1=[*(CAST($3):DECIMAL(10, 0), $4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject($f0=[$0], $f1=[*(CAST($3):DECIMAL(10, 0), $4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f1=[$0]) + HiveAggregate(group=[{1}]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveFilter(condition=[>($3, 4)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject($f0=[substr($6, 1, 30)], $f1=[$5], $f2=[$3]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sales=[*(CAST($5):DECIMAL(10, 0), $6)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject($f0=[$3], $f1=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject($f0=[$0], $f1=[*(CAST($3):DECIMAL(10, 0), $4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject($f0=[$0], $f1=[*(CAST($3):DECIMAL(10, 0), $4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f1=[$0]) + HiveAggregate(group=[{1}]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveFilter(condition=[>($3, 4)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject($f0=[substr($6, 1, 30)], $f1=[$5], $f2=[$3]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out new file mode 100644 index 0000000000..53220d2b0e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out @@ -0,0 +1,171 @@ +Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain cbo +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) + HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)]) + HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) + HiveAggregate(group=[{0, 6, 7, 15, 16, 18, 19, 21, 23}], agg#0=[sum($13)]) + HiveJoin(condition=[AND(=($8, UPPER($2)), =($24, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10]) + HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(_o__c0=[*(0.05, /($0, $1))]) + HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) + HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10]) + HiveAggregate(group=[{3, 4, 12, 14, 17, 18, 19, 20, 21, 22}], agg#0=[sum($10)]) + HiveJoin(condition=[AND(=($5, UPPER($24)), =($15, $23))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10]) + HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out new file mode 100644 index 0000000000..88e0cf025e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 28, 29}], agg#0=[sum($8)], agg#1=[sum($23)], agg#2=[sum($15)]) + HiveJoin(condition=[=($27, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[AND(AND(=($2, $18), =($1, $17)), =($4, $19))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(4):INTEGER]) + HiveFilter(condition=[AND(=($8, 4), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_net_profit=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6], sr_returned_date_sk=[$7], sr_item_sk=[$8], sr_customer_sk=[$9], sr_ticket_number=[$10], sr_net_loss=[$11], d_date_sk0=[$12], d_year0=[$13], d_moy0=[$14]) + HiveJoin(condition=[AND(=($9, $1), =($8, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_net_loss=[$4], d_date_sk=[$5], d_year=[$6], d_moy=[$7]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out new file mode 100644 index 0000000000..8b36aed617 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain cbo +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[/(CAST($1):DOUBLE, $2)], $f2=[/($3, $4)], $f3=[/($5, $6)], $f4=[/($7, $8)]) + HiveAggregate(group=[{1}], agg#0=[sum($6)], agg#1=[count($6)], agg#2=[sum($7)], agg#3=[count($7)], agg#4=[sum($9)], agg#5=[count($9)], agg#6=[sum($8)], agg#7=[count($8)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_promo_sk=[$16], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($15), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cd_demo_sk=[$0], cd_gender=[CAST(_UTF-16LE'F'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_marital_status=[CAST(_UTF-16LE'W'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_education_status=[CAST(_UTF-16LE'Primary'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'F'), =($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0], p_channel_email=[$9], p_channel_event=[$14]) + HiveFilter(condition=[AND(OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out new file mode 100644 index 0000000000..9a6718af9b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], s_state=[$1], g_state=[grouping($10, 0)], agg1=[/(CAST($2):DOUBLE, $3)], agg2=[/($4, $5)], agg3=[/($6, $7)], agg4=[/($8, $9)]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[count($2)], agg#2=[sum($3)], agg#3=[count($3)], agg#4=[sum($4)], agg#5=[count($4)], agg#6=[sum($5)], agg#7=[count($5)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$1], $f1=[$17], $f2=[$6], $f3=[$7], $f4=[$9], $f5=[$8]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_store_sk=[$7], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0], cd_gender=[CAST(_UTF-16LE'M'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_marital_status=[CAST(_UTF-16LE'U'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_education_status=[CAST(_UTF-16LE'2 yr Degree'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'U'), =($3, _UTF-16LE'2 yr Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out new file mode 100644 index 0000000000..643e5b6259 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out @@ -0,0 +1,146 @@ +Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain cbo +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f00=[$15], $f10=[$16], $f20=[$17], $f01=[$12], $f11=[$13], $f21=[$14], $f02=[$9], $f12=[$10], $f22=[$11], $f03=[$6], $f13=[$7], $f23=[$8], $f04=[$3], $f14=[$4], $f24=[$5]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 0, 5), OR(BETWEEN(false, $12, 11, 21), BETWEEN(false, $19, 460, 1460), BETWEEN(false, $11, 14, 34)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 26, 30), OR(BETWEEN(false, $12, 28, 38), BETWEEN(false, $19, 2513, 3513), BETWEEN(false, $11, 42, 62)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 21, 25), OR(BETWEEN(false, $12, 135, 145), BETWEEN(false, $19, 14180, 15180), BETWEEN(false, $11, 38, 58)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 16, 20), OR(BETWEEN(false, $12, 142, 152), BETWEEN(false, $19, 3054, 4054), BETWEEN(false, $11, 80, 100)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 11, 15), OR(BETWEEN(false, $12, 66, 76), BETWEEN(false, $19, 920, 1920), BETWEEN(false, $11, 4, 24)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 6, 10), OR(BETWEEN(false, $12, 91, 101), BETWEEN(false, $19, 1430, 2430), BETWEEN(false, $11, 32, 52)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out new file mode 100644 index 0000000000..9f9ffe50cd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out @@ -0,0 +1,144 @@ +PREHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{7, 8, 27, 28}], agg#0=[sum($14)], agg#1=[sum($22)], agg#2=[sum($3)]) + HiveJoin(condition=[AND(=($20, $1), =($19, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_quantity=[$8], d_date_sk=[$9], d_year=[$10], d_moy=[$11], sr_returned_date_sk=[$12], sr_item_sk=[$13], sr_customer_sk=[$14], sr_ticket_number=[$15], sr_return_quantity=[$16], d_date_sk0=[$17], d_year0=[$18], d_moy0=[$19], s_store_sk=[$20], s_store_id=[$21], s_store_name=[$22]) + HiveJoin(condition=[=($20, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[AND(AND(=($2, $11), =($1, $10)), =($4, $12))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(4):INTEGER]) + HiveFilter(condition=[AND(=($8, 4), =($6, 1999), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5], d_year=[$6], d_moy=[$7]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 7), =($6, 1999), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query3.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query3.q.out new file mode 100644 index 0000000000..09c9bb78fe --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query3.q.out @@ -0,0 +1,64 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$1], dir0=[ASC], dir1=[DESC-nulls-last], dir2=[ASC], fetch=[100]) + HiveProject(d_year=[$2], i_brand_id=[$0], i_brand=[$1], $f3=[$3]) + HiveAggregate(group=[{4, 5, 8}], agg#0=[sum($2)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manufact_id=[CAST(436):INTEGER]) + HiveFilter(condition=[AND(=($13, 436), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out new file mode 100644 index 0000000000..067b2bc8a3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: explain cbo +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10], sort11=[$11], sort12=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], dir10=[ASC], dir11=[ASC], dir12=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1], c_salutation=[$3], c_first_name=[$4], c_last_name=[$5], c_preferred_cust_flag=[$6], c_birth_day=[$7], c_birth_month=[$8], c_birth_year=[$9], c_birth_country=[$10], c_login=[$11], c_email_address=[$12], c_last_review_date=[$13], ctr_total_return=[$18]) + HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($14, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_current_addr_sk=[$4], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_day=[$11], c_birth_month=[$12], c_birth_year=[$13], c_birth_country=[$14], c_login=[$15], c_email_address=[$16], c_last_review_date=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'IL'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'IL'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) + HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_state=[$0]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ca_state=[$0], wr_returning_customer_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out new file mode 100644 index 0000000000..5143053673 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: explain cbo +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(ca_county=[$0], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($9, $7)], store_q1_q2_increase=[/($1, $3)], web_q2_q3_increase=[/($11, $9)], store_q2_q3_increase=[/($5, $1)]) + HiveJoin(condition=[AND(AND(=($0, $6), CASE(>($3, 0), CASE(>($7, 0), >(/($9, $7), /($1, $3)), >(null, /($1, $3))), CASE(>($7, 0), >(/($9, $7), null), null))), CASE(>($1, 0), CASE(>($9, 0), >(/($11, $9), /($5, $1)), >(null, /($5, $1))), CASE(>($9, 0), >(/($11, $9), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(2):INTEGER]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($10, 3), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(2):INTEGER]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($10, 3), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query32.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query32.q.out new file mode 100644 index 0000000000..981d478d48 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query32.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[AND(>($2, CAST(*(1.3, $6)):DECIMAL(14, 7)), =($7, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_discount_amt=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_item_sk=[$0], $f1=[$1], i_item_sk=[$2], i_manufact_id=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_discount_amt=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_manufact_id=[CAST(269):INTEGER]) + HiveFilter(condition=[AND(=($13, 269), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out new file mode 100644 index 0000000000..b5c7f11411 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out @@ -0,0 +1,238 @@ +PREHOOK: query: explain cbo +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books'), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], ss_sold_date_sk=[$2], ss_item_sk=[$3], ss_addr_sk=[$4], ss_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books'), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], cs_sold_date_sk=[$2], cs_bill_addr_sk=[$3], cs_item_sk=[$4], cs_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books'), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], ws_sold_date_sk=[$2], ws_item_sk=[$3], ws_bill_addr_sk=[$4], ws_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out new file mode 100644 index 0000000000..4e50a1ce37 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out @@ -0,0 +1,99 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[DESC-nulls-last]) + HiveProject(c_last_name=[$3], c_first_name=[$2], c_salutation=[$1], c_preferred_cust_flag=[$4], ss_ticket_number=[$5], cnt=[$7]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveFilter(condition=[BETWEEN(false, $2, 15, 20)]) + HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[count()]) + HiveJoin(condition=[=($3, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_dom=[$9]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(hd_demo_sk=[$0], hd_buy_potential=[$2], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), null), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0], s_county=[$23]) + HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County', _UTF-16LE'Fairfield County', _UTF-16LE'Jackson County', _UTF-16LE'Barrow County', _UTF-16LE'Pennington County'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out new file mode 100644 index 0000000000..2b8a4531b2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: explain cbo +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o__c4=[$4], _o__c5=[$5], _o__c6=[$6], cd_dep_employed_count=[$7], cnt2=[$8], _o__c9=[$9], _o__c10=[$10], _o__c11=[$11], cd_dep_college_count=[$12], cnt3=[$13], _o__c14=[$14], _o__c15=[$15], _o__c16=[$16]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$17], sort4=[$7], sort5=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], fetch=[100]) + HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$6], _o__c4=[/(CAST($7):DOUBLE, $8)], _o__c5=[$9], _o__c6=[$7], cd_dep_employed_count=[$4], cnt2=[$6], _o__c9=[/(CAST($10):DOUBLE, $11)], _o__c10=[$12], _o__c11=[$10], cd_dep_college_count=[$5], cnt3=[$6], _o__c14=[/(CAST($13):DOUBLE, $14)], _o__c15=[$15], _o__c16=[$13], (tok_table_or_col cd_dep_count)=[$3]) + HiveAggregate(group=[{4, 6, 7, 8, 9, 10}], agg#0=[count()], agg#1=[sum($8)], agg#2=[count($8)], agg#3=[max($8)], agg#4=[sum($9)], agg#5=[count($9)], agg#6=[max($9)], agg#7=[sum($10)], agg#8=[count($10)], agg#9=[max($10)]) + HiveFilter(condition=[OR(IS NOT NULL($12), IS NOT NULL($14))]) + HiveJoin(condition=[=($0, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $11)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $11)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_qoy=[$10]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_qoy=[$10]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_qoy=[$10]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out new file mode 100644 index 0000000000..b2a713f5c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain cbo +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(gross_margin=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(gross_margin=[/($2, $3)], i_category=[$0], i_class=[$1], lochierarchy=[+(grouping($4, 1), grouping($4, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($4, 1), grouping($4, 0)), CASE(=(grouping($4, 0), 0), $0, null) ORDER BY /($2, $3) NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col i_category))=[CASE(=(+(grouping($4, 1), grouping($4, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], GROUPING__ID=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$11], $f1=[$10], $f2=[$4], $f3=[$3]) + HiveJoin(condition=[=($9, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC', _UTF-16LE'AL', _UTF-16LE'GA'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query37.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query37.q.out new file mode 100644 index 0000000000..0edc78b871 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query37.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) + HiveAggregate(group=[{2, 3, 4}]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($15)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IN($13, 678, 964, 918, 849), BETWEEN(false, $5, 22, 52), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$2], d_date_sk=[$3], d_date=[$4]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 100, 500), IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00, 2001-08-01 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out new file mode 100644 index 0000000000..9633df11a1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out @@ -0,0 +1,111 @@ +PREHOOK: query: explain cbo +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out new file mode 100644 index 0000000000..fd3038e37c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out @@ -0,0 +1,168 @@ +PREHOOK: query: explain cbo +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[$2], cov=[$3], w_warehouse_sk1=[$4], i_item_sk1=[$5], d_moy1=[CAST(5):INTEGER], mean1=[$6], cov1=[$7]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$6], sort5=[$7], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC]) + HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[$2], cov=[$3], w_warehouse_sk0=[$4], i_item_sk0=[$5], mean0=[$6], cov0=[$7]) + HiveJoin(condition=[AND(=($1, $5), =($0, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, /(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)))]) + HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, >(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)), 1))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) + HiveProject($f0=[$9], $f1=[$8], $f2=[$0], $f4=[$4], $f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)]) + HiveJoin(condition=[=($3, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(4):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, /(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)))]) + HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, >(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)), 1))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) + HiveProject($f0=[$9], $f1=[$8], $f2=[$0], $f4=[$4], $f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)]) + HiveJoin(condition=[=($3, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(5):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 5), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + +PREHOOK: query: with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out new file mode 100644 index 0000000000..d796f8622c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out @@ -0,0 +1,324 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveJoin(condition=[AND(=($0, $7), CASE(CAST(IS NOT NULL($8)):BOOLEAN, CASE(CAST(IS NOT NULL($10)):BOOLEAN, >(/($4, $10), /($2, $8)), >(null, /($2, $8))), CASE(CAST(IS NOT NULL($10)):BOOLEAN, >(/($4, $10), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f3=[$3], $f8=[$7]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[/(+(-(-($13, $12), $10), $11), CAST(2):DECIMAL(10, 0))]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_ext_list_price=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[AND(=($4, $0), CASE(CAST(IS NOT NULL($9)):BOOLEAN, CASE(CAST(IS NOT NULL($7)):BOOLEAN, >(/($1, $7), /($3, $9)), >(null, /($3, $9))), CASE(CAST(IS NOT NULL($7)):BOOLEAN, >(/($1, $7), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[/(+(-(-($13, $12), $10), $11), CAST(2):DECIMAL(10, 0))]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_ext_discount_amt=[$22], cs_ext_sales_price=[$23], cs_ext_wholesale_cost=[$24], cs_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[/(+(-(-($13, $12), $10), $11), CAST(2):DECIMAL(10, 0))]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_sales_price=[$23], ws_ext_wholesale_cost=[$24], ws_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$7]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[/(+(-(-($13, $12), $10), $11), CAST(2):DECIMAL(10, 0))]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_ext_list_price=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$7]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[/(+(-(-($13, $12), $10), $11), CAST(2):DECIMAL(10, 0))]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_ext_discount_amt=[$22], cs_ext_sales_price=[$23], cs_ext_wholesale_cost=[$24], cs_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$7]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f6=[$7], $f8=[/(+(-(-($13, $12), $10), $11), CAST(2):DECIMAL(10, 0))]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_sales_price=[$23], ws_ext_wholesale_cost=[$24], ws_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out new file mode 100644 index 0000000000..7d8fc9b18d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain cbo +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveProject($f0=[$1], $f1=[$13], $f2=[CASE(<(CAST($11):DATE, 1998-04-08), -($6, CASE(IS NOT NULL($9), $9, 0)), 0)], $f3=[CASE(>=(CAST($11):DATE, 1998-04-08), -($6, CASE(IS NOT NULL($9), $9, 0)), 0)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(w_warehouse_sk=[$0], w_state=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($10, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($2, $5))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$14], cs_item_sk=[$15], cs_order_number=[$17], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00, 1998-05-08 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_current_price=[$5]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99, 1.49), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query42.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query42.q.out new file mode 100644 index 0000000000..ddb32232e9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query42.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject(d_year=[CAST(1998):INTEGER], i_category_id=[$0], i_category=[$1], _o__c3=[$2]) + HiveSortLimit(sort0=[$3], sort1=[$0], sort2=[$1], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC]) + HiveProject(i_category_id=[$0], i_category=[$1], _o__c3=[$2], (tok_function sum (tok_table_or_col ss_ext_sales_price))=[$2]) + HiveAggregate(group=[{7, 8}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) + HiveProject(i_item_sk=[$0], i_category_id=[$11], i_category=[$12], i_manager_id=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($20, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out new file mode 100644 index 0000000000..f6eeae33ed --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: explain cbo +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) + HiveProject($f0=[$8], $f1=[$7], $f2=[CASE(=($5, _UTF-16LE'Sunday'), $2, null)], $f3=[CASE(=($5, _UTF-16LE'Monday'), $2, null)], $f4=[CASE(=($5, _UTF-16LE'Tuesday'), $2, null)], $f5=[CASE(=($5, _UTF-16LE'Wednesday'), $2, null)], $f6=[CASE(=($5, _UTF-16LE'Thursday'), $2, null)], $f7=[CASE(=($5, _UTF-16LE'Friday'), $2, null)], $f8=[CASE(=($5, _UTF-16LE'Saturday'), $2, null)]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER], d_day_name=[$14]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5], s_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($27, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query44.q.out new file mode 100644 index 0000000000..73d4213656 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query44.q.out @@ -0,0 +1,115 @@ +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain cbo +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(rnk=[$3], best_performing=[$1], worst_performing=[$5]) + HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[i1]) + HiveProject(item_sk=[$0], rank_window_0=[$1]) + HiveFilter(condition=[AND(<($1, 11), IS NOT NULL($0))]) + HiveProject(item_sk=[$0], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY $1 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveJoin(condition=[>($1, *(0.9, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{2}], agg#0=[sum($22)], agg#1=[count($22)]) + HiveFilter(condition=[=($7, 410)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[ss1]) + HiveProject($f1=[/($1, $2)]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveProject($f0=[true], $f1=[$22]) + HiveFilter(condition=[AND(=($7, 410), IS NULL($5))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$1], item_sk=[$2], rank_window_0=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[i2]) + HiveProject(item_sk=[$0], rank_window_0=[$1]) + HiveFilter(condition=[AND(<($1, 11), IS NOT NULL($0))]) + HiveProject(item_sk=[$0], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY $1 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveJoin(condition=[>($1, *(0.9, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{2}], agg#0=[sum($22)], agg#1=[count($22)]) + HiveFilter(condition=[=($7, 410)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[ss1]) + HiveProject($f1=[/($1, $2)]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveProject($f0=[true], $f1=[$22]) + HiveFilter(condition=[AND(=($7, 410), IS NULL($5))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query45.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query45.q.out new file mode 100644 index 0000000000..986dae94f5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query45.q.out @@ -0,0 +1,89 @@ +Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain cbo +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ca_zip=[$1], ca_county=[$0], $f2=[$2]) + HiveAggregate(group=[{7, 8}], agg#0=[sum($3)]) + HiveFilter(condition=[OR(IN(substr($8, 1, 5), _UTF-16LE'85669', _UTF-16LE'86197', _UTF-16LE'88274', _UTF-16LE'83405', _UTF-16LE'86475', _UTF-16LE'85392', _UTF-16LE'85460', _UTF-16LE'80348', _UTF-16LE'81792'), CASE(=($14, 0), false, IS NOT NULL($17), true, IS NULL($13), null, <($15, $14), null, false))]) + HiveProject(ws_sold_date_sk=[$9], ws_item_sk=[$10], ws_bill_customer_sk=[$11], ws_sales_price=[$12], c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_county=[$3], ca_zip=[$4], d_date_sk=[$13], d_year=[$14], d_qoy=[$15], i_item_sk=[$5], i_item_id=[$6], c=[$16], ck=[$17], i_item_id0=[$7], i1142=[$8]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($11, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2], i1142=[$3], ws_sold_date_sk=[$4], ws_item_sk=[$5], ws_bill_customer_sk=[$6], ws_sales_price=[$7], d_date_sk=[$8], d_year=[$9], d_qoy=[$10]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0], i1142=[true]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_customer_sk=[$2], ws_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_qoy=[$6]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(2):INTEGER]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c=[$0], ck=[$1]) + HiveAggregate(group=[{}], c=[COUNT()], ck=[COUNT($1)]) + HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out new file mode 100644 index 0000000000..0728056fe6 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out @@ -0,0 +1,115 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(c_last_name=[$3], c_first_name=[$2], ca_city=[$5], bought_city=[$8], ss_ticket_number=[$6], amt=[$9], profit=[$10]) + HiveJoin(condition=[AND(<>($5, $8), =($7, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[current_addr]) + HiveProject(ss_ticket_number=[$3], ss_customer_sk=[$1], bought_city=[$0], amt=[$4], profit=[$5]) + HiveAggregate(group=[{1, 3, 5, 7}], agg#0=[sum($8)], agg#1=[sum($9)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_dow=[$7]) + HiveFilter(condition=[AND(IN($7, 6, 0), IN($6, 1998, 1999, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_city=[$22]) + HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out new file mode 100644 index 0000000000..11fd5773e9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], sum_sales=[$4], psum=[$5], nsum=[$6]) + HiveSortLimit(sort0=[$7], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$12], d_year=[$16], d_moy=[$17], avg_monthly_sales=[$19], sum_sales=[$18], psum=[$10], nsum=[$4], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($18, $19)]) + HiveJoin(condition=[AND(AND(AND(AND(=($12, $0), =($13, $1)), =($14, $2)), =($15, $3)), =($20, -($5, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], rank_window_1=[$5]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[AND(AND(AND(AND(=($6, $0), =($7, $1)), =($8, $2)), =($9, $3)), =($14, +($5, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], rank_window_1=[$5]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) + HiveFilter(condition=[AND(=($4, 2000), >($7, 0), CASE(>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1), null), IS NOT NULL($8))]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $1, $0, $4, $5, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out new file mode 100644 index 0000000000..feb4012a37 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: explain cbo +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[sum($13)]) + HiveJoin(condition=[AND(=($11, $0), OR(AND(IN($1, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $15, 0, 2000)), AND(IN($1, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $15, 150, 3000)), AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $15, 50, 25000))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[CAST(_UTF-16LE'M'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_education_status=[CAST(_UTF-16LE'4 yr Degree'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_sales_price=[$13], ss_net_profit=[$22]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 0, 2000), BETWEEN(false, $22, 150, 3000), BETWEEN(false, $22, 50, 25000)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out new file mode 100644 index 0000000000..9c31d615f9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out @@ -0,0 +1,330 @@ +PREHOOK: query: explain cbo +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveAggregate(group=[{0, 1, 2, 3, 4}]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveAggregate(group=[{0, 1, 2, 3, 4}]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'web'], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) + HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) + HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)]) + HiveProject($f0=[$5], $f1=[CASE(IS NOT NULL($2), $2, 0)], $f2=[CASE(IS NOT NULL($7), $7, 0)], $f3=[CASE(IS NOT NULL($3), $3, 0)], $f4=[CASE(IS NOT NULL($8), $8, 0)]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveFilter(condition=[AND(>($15, 10000), IS NOT NULL($13), IS NOT NULL($2))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_net_paid=[$29], ws_net_profit=[$33]) + HiveFilter(condition=[AND(>($33, 1), >($29, 0), >($18, 0), IS NOT NULL($17), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) + HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) + HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)]) + HiveProject($f0=[$5], $f1=[CASE(IS NOT NULL($2), $2, 0)], $f2=[CASE(IS NOT NULL($7), $7, 0)], $f3=[CASE(IS NOT NULL($3), $3, 0)], $f4=[CASE(IS NOT NULL($8), $8, 0)]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveFilter(condition=[AND(>($18, 10000), IS NOT NULL($16), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_net_paid=[$29], cs_net_profit=[$33]) + HiveFilter(condition=[AND(>($33, 1), >($29, 0), >($18, 0), IS NOT NULL($17), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'store'], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) + HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) + HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)]) + HiveProject($f0=[$5], $f1=[CASE(IS NOT NULL($2), $2, 0)], $f2=[CASE(IS NOT NULL($7), $7, 0)], $f3=[CASE(IS NOT NULL($3), $3, 0)], $f4=[CASE(IS NOT NULL($8), $8, 0)]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveFilter(condition=[AND(>($11, 10000), IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[sr]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_net_paid=[$20], ss_net_profit=[$22]) + HiveFilter(condition=[AND(>($22, 1), >($20, 0), >($10, 0), IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out new file mode 100644 index 0000000000..a82f0119a2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out @@ -0,0 +1,342 @@ +PREHOOK: query: explain cbo +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_page +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_page +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[||(_UTF-16LE'store', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveAggregate(group=[{9}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(store_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(store_sk=[$7], date_sk=[$0], sales_price=[$15], profit=[$22], return_amt=[CAST(0):DECIMAL(7, 2)], net_loss=[CAST(0):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(store_sk=[$7], date_sk=[$0], sales_price=[CAST(0):DECIMAL(7, 2)], profit=[CAST(0):DECIMAL(7, 2)], return_amt=[$11], net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-08-18 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[||(_UTF-16LE'catalog_page', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)], agg#3=[sum($7)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0):DECIMAL(7, 2)], net_loss=[CAST(0):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[CAST(0):DECIMAL(7, 2)], profit=[CAST(0):DECIMAL(7, 2)], return_amt=[$18], net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-08-18 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveAggregate(group=[{9}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wsr_web_site_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(wsr_web_site_sk=[$13], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0):DECIMAL(7, 2)], net_loss=[CAST(0):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wsr_web_site_sk=[$1], date_sk=[$3], sales_price=[CAST(0):DECIMAL(7, 2)], profit=[CAST(0):DECIMAL(7, 2)], return_amt=[$6], net_loss=[$7]) + HiveJoin(condition=[AND(=($4, $0), =($5, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$3], ws_web_site_sk=[$13], ws_order_number=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($3), IS NOT NULL($17))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-08-18 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out new file mode 100644 index 0000000000..2be59c1d4e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out @@ -0,0 +1,151 @@ +PREHOOK: query: explain cbo +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}], agg#0=[sum($10)], agg#1=[sum($11)], agg#2=[sum($12)], agg#3=[sum($13)], agg#4=[sum($14)]) + HiveProject($f0=[$14], $f1=[$15], $f2=[$16], $f3=[$17], $f4=[$18], $f5=[$19], $f6=[$20], $f7=[$21], $f8=[$22], $f9=[$23], $f10=[CASE(<=(-($5, $0), 30), 1, 0)], $f11=[CASE(AND(>(-($5, $0), 30), <=(-($5, $0), 60)), 1, 0)], $f12=[CASE(AND(>(-($5, $0), 60), <=(-($5, $0), 90)), 1, 0)], $f13=[CASE(AND(>(-($5, $0), 90), <=(-($5, $0), 120)), 1, 0)], $f14=[CASE(>(-($5, $0), 120), 1, 0)]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($4, $8), =($1, $6)), =($2, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(9):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_id=[$16], s_street_number=[$18], s_street_name=[$19], s_street_type=[$20], s_suite_number=[$21], s_city=[$22], s_county=[$23], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out new file mode 100644 index 0000000000..fed8677737 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query51.q.out @@ -0,0 +1,125 @@ +PREHOOK: query: explain cbo +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_sk=[$0], d_date=[$1], web_sales=[$2], store_sales=[$3], max_window_0=[$4], max_window_1=[$5]) + HiveFilter(condition=[>($4, $5)]) + HiveProject(item_sk=[CASE(IS NOT NULL($3), $3, $0)], d_date=[CASE(IS NOT NULL($4), $4, $1)], web_sales=[$5], store_sales=[$2], max_window_0=[max($5) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], max_window_1=[max($2) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) + HiveJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[full], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) + HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) + HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) + HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) + HiveProject(ws_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query52.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query52.q.out new file mode 100644 index 0000000000..627b8b05d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query52.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(d_year=[CAST(1998):INTEGER], brand_id=[$0], brand=[$1], ext_price=[$2]) + HiveSortLimit(sort0=[$2], sort1=[$0], dir0=[DESC-nulls-last], dir1=[ASC], fetch=[100]) + HiveProject(i_brand_id=[$0], i_brand=[$1], $f2=[$2]) + HiveAggregate(group=[{7, 8}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manager_id=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($20, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out new file mode 100644 index 0000000000..56b1ba286d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: explain cbo +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1), null)]) + HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_manufact_id=[$0], d_qoy=[$1], $f2=[$2]) + HiveAggregate(group=[{9, 12}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3], d_qoy=[$10]) + HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out new file mode 100644 index 0000000000..1a875e87df --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -0,0 +1,213 @@ +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +PREHOOK: query: explain cbo +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(segment=[$0], num_customers=[$1], segment_base=[*($0, 50)]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject(segment=[CAST(/($1, CAST(50):DECIMAL(10, 0))):INTEGER]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveFilter(condition=[BETWEEN(false, $2, $3, $4)]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], ss_ext_sales_price=[$4], d_month_seq=[$11], _o__c0=[$13]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$10], $f1=[$11], ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_ext_sales_price=[$2], ca_address_sk=[$5], ca_county=[$6], ca_state=[$7], s_county=[$8], s_state=[$9], d_date_sk=[$3], d_month_seq=[$4], cnt=[$12], $f00=[$13]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($10, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) + HiveAggregate(group=[{0, 1}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) + HiveFilter(condition=[AND(=($8, 3), =($6, 1999), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_class=[CAST(_UTF-16LE'consignment'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_category=[CAST(_UTF-16LE'Jewelry'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 3)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 3)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query55.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query55.q.out new file mode 100644 index 0000000000..4182de0b57 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query55.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], ext_price=[$2]) + HiveSortLimit(sort0=[$2], sort1=[$3], dir0=[DESC-nulls-last], dir1=[ASC], fetch=[100]) + HiveProject(brand_id=[$0], brand=[$1], ext_price=[$2], (tok_table_or_col i_brand_id)=[$0]) + HiveAggregate(group=[{7, 8}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manager_id=[CAST(36):INTEGER]) + HiveFilter(condition=[AND(=($20, 36), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out new file mode 100644 index 0000000000..579d06a203 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out @@ -0,0 +1,224 @@ +PREHOOK: query: explain cbo +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace'), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], ss_sold_date_sk=[$2], ss_item_sk=[$3], ss_addr_sk=[$4], ss_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -8), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace'), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], cs_sold_date_sk=[$2], cs_bill_addr_sk=[$3], cs_item_sk=[$4], cs_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -8), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace'), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], ws_sold_date_sk=[$2], ws_item_sk=[$3], ws_bill_addr_sk=[$4], ws_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -8), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out new file mode 100644 index 0000000000..e0480e415b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out @@ -0,0 +1,171 @@ +PREHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_sales=[$4], sum_sales=[$5], psum=[$6], nsum=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$10], i_brand=[$11], d_year=[$13], d_moy=[$14], avg_monthly_sales=[$16], sum_sales=[$15], psum=[$8], nsum=[$3], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($15, $16)]) + HiveJoin(condition=[AND(AND(AND(=($10, $0), =($11, $1)), =($12, $2)), =($17, -($4, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], rank_window_1=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveJoin(condition=[AND(AND(AND(=($5, $0), =($6, $1)), =($7, $2)), =($12, +($4, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], rank_window_1=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) + HiveFilter(condition=[AND(=($3, 2000), >($6, 0), CASE(>($6, 0), >(/(ABS(-($5, $6)), $6), 0.1), null), IS NOT NULL($7))]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $1, $0, $4, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out new file mode 100644 index 0000000000..2504d78380 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -0,0 +1,240 @@ +Warning: Shuffle Join MERGEJOIN[404][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product +PREHOOK: query: explain cbo +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, $1), $5), CAST(3):DECIMAL(10, 0))]) + HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, *(0.9, $5), *(1.1, $5))), BETWEEN(false, $1, *(0.9, $5), *(1.1, $5))), BETWEEN(false, $5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), *(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out new file mode 100644 index 0000000000..bb92a1fcf7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: explain cbo +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(s_store_name1=[$2], s_store_id1=[$1], d_week_seq1=[$3], _o__c3=[/($5, $16)], _o__c4=[/($6, $17)], _o__c5=[/($7, $7)], _o__c6=[/($8, $18)], _o__c7=[/($9, $19)], _o__c8=[/($10, $20)], _o__c9=[/($11, $21)]) + HiveJoin(condition=[AND(=($1, $15), =($3, -($14, 52)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE(=($5, _UTF-16LE'Sunday'), $2, null)], $f3=[CASE(=($5, _UTF-16LE'Monday'), $2, null)], $f4=[CASE(=($5, _UTF-16LE'Tuesday'), $2, null)], $f5=[CASE(=($5, _UTF-16LE'Wednesday'), $2, null)], $f6=[CASE(=($5, _UTF-16LE'Thursday'), $2, null)], $f7=[CASE(=($5, _UTF-16LE'Friday'), $2, null)], $f8=[CASE(=($5, _UTF-16LE'Saturday'), $2, null)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], d_day_name=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_month_seq=[$3], d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE(=($5, _UTF-16LE'Sunday'), $2, null)], $f3=[CASE(=($5, _UTF-16LE'Monday'), $2, null)], $f4=[CASE(=($5, _UTF-16LE'Tuesday'), $2, null)], $f5=[CASE(=($5, _UTF-16LE'Wednesday'), $2, null)], $f6=[CASE(=($5, _UTF-16LE'Thursday'), $2, null)], $f7=[CASE(=($5, _UTF-16LE'Friday'), $2, null)], $f8=[CASE(=($5, _UTF-16LE'Saturday'), $2, null)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], d_day_name=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_month_seq=[$3], d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out new file mode 100644 index 0000000000..02149e760d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out @@ -0,0 +1,109 @@ +Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Reducer 15' is a cross product +PREHOOK: query: explain cbo +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(ca_state=[$0], $f1=[$1]) + HiveFilter(condition=[>=($1, 10)]) + HiveAggregate(group=[{9}], agg#0=[count()]) + HiveJoin(condition=[=($1, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[s]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject(d_month_seq=[$0]) + HiveAggregate(group=[{3}]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_state=[$3]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[a]) + HiveProject(i_item_sk=[$0], i_current_price=[$1], i_category=[$2], _o__c0=[$3], i_category0=[$4], cnt=[$5]) + HiveJoin(condition=[AND(=($4, $2), >($1, *(1.2, CAST($3):DECIMAL(16, 6))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, item]], table:alias=[i]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(_o__c0=[/($1, $2)], i_category=[$0]) + HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)]) + HiveFilter(condition=[IS NOT NULL($12)]) + HiveTableScan(table=[[default, item]], table:alias=[j]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject(d_month_seq=[$0]) + HiveAggregate(group=[{3}]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out new file mode 100644 index 0000000000..81dfec798b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out @@ -0,0 +1,244 @@ +PREHOOK: query: explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Children'), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], ss_sold_date_sk=[$2], ss_item_sk=[$3], ss_addr_sk=[$4], ss_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Children'), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], cs_sold_date_sk=[$2], cs_bill_addr_sk=[$3], cs_item_sk=[$4], cs_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Children'), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[$1], ws_sold_date_sk=[$2], ws_item_sk=[$3], ws_bill_addr_sk=[$4], ws_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out new file mode 100644 index 0000000000..c5356dfe63 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out @@ -0,0 +1,164 @@ +Warning: Shuffle Join MERGEJOIN[266][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +PREHOOK: query: explain cbo +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4)), CAST(100):DECIMAL(10, 0))]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($9)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-7):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_promo_sk=[$4], ss_ext_sales_price=[$5], d_date_sk=[$6], d_year=[$7], d_moy=[$8], i_item_sk=[$9], i_category=[$10], s_store_sk=[$11], s_gmt_offset=[$12], p_promo_sk=[$13], p_channel_dmail=[$14], p_channel_email=[$15], p_channel_tv=[$16]) + HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8), IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_category=[CAST(_UTF-16LE'Electronics'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Electronics'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0], s_gmt_offset=[CAST(-7):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($27, -7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(p_promo_sk=[$0], p_channel_dmail=[$8], p_channel_email=[$9], p_channel_tv=[$11]) + HiveFilter(condition=[AND(OR(=($8, _UTF-16LE'Y'), =($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-7):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], d_year=[$6], d_moy=[$7], i_item_sk=[$8], i_category=[$9], s_store_sk=[$10], s_gmt_offset=[$11]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_category=[CAST(_UTF-16LE'Electronics'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Electronics'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0], s_gmt_offset=[CAST(-7):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($27, -7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out new file mode 100644 index 0000000000..a0809895eb --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1), null)]) + HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_manager_id=[$0], d_moy=[$1], $f2=[$2]) + HiveAggregate(group=[{9, 12}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_manager_id=[$20]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3], d_moy=[$8]) + HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out new file mode 100644 index 0000000000..3b59bd8999 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out @@ -0,0 +1,438 @@ +PREHOOK: query: explain cbo +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number=[$3], b_streen_name=[$4], b_city=[$5], b_zip=[$6], c_street_number=[$7], c_street_name=[$8], c_city=[$9], c_zip=[$10], syear=[CAST(2000):INTEGER], cnt=[$11], s1=[$12], s2=[$13], s3=[$14], s11=[$15], s21=[$16], s31=[$17], syear1=[CAST(2001):INTEGER], cnt1=[$18]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$18], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + HiveProject(product_name=[$0], store_name=[$2], store_zip=[$3], b_street_number=[$4], b_streen_name=[$5], b_city=[$6], b_zip=[$7], c_street_number=[$8], c_street_name=[$9], c_city=[$10], c_zip=[$11], cnt=[$12], s1=[$13], s2=[$14], s3=[$15], s11=[$20], s21=[$21], s31=[$22], cnt1=[$19]) + HiveJoin(condition=[AND(AND(AND(=($1, $16), <=($19, $12)), =($2, $17)), =($3, $18))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$13], $f1=[$12], $f2=[$10], $f3=[$11], $f4=[$6], $f5=[$7], $f6=[$8], $f7=[$9], $f8=[$2], $f9=[$3], $f10=[$4], $f11=[$5], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{9, 11, 16, 17, 18, 19, 25, 26, 27, 28, 30, 31, 48, 51}], agg#0=[count()], agg#1=[sum($45)], agg#2=[sum($46)], agg#3=[sum($47)]) + HiveJoin(condition=[AND(<>($1, $21), =($39, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($36, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], ib_income_band_sk=[$12], p_promo_sk=[$13], ss_sold_date_sk=[$14], ss_item_sk=[$15], ss_customer_sk=[$16], ss_cdemo_sk=[$17], ss_hdemo_sk=[$18], ss_addr_sk=[$19], ss_store_sk=[$20], ss_promo_sk=[$21], ss_ticket_number=[$22], ss_wholesale_cost=[$23], ss_list_price=[$24], ss_coupon_amt=[$25], i_item_sk=[$26], i_current_price=[$27], i_color=[$28], i_product_name=[$29], d_date_sk=[$30], d_year=[$31], $f0=[$32], $f1=[$33], $f2=[$34]) + HiveJoin(condition=[AND(=($15, $0), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($17, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($5, $22)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) + HiveProject(p_promo_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_current_price=[$14], i_color=[$15], i_product_name=[$16], d_date_sk=[$17], d_year=[$18]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveJoin(condition=[=($0, $16)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_color=[$17], i_product_name=[$21]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[>($1, *(2, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveProject($f0=[$0], $f1=[$2], $f2=[+(+($5, $6), $7)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($17))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23], cr_reversed_charge=[$24], cr_store_credit=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject($f1=[$12], $f2=[$10], $f3=[$11], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{9, 11, 16, 17, 18, 19, 25, 26, 27, 28, 30, 31, 48, 51}], agg#0=[count()], agg#1=[sum($45)], agg#2=[sum($46)], agg#3=[sum($47)]) + HiveJoin(condition=[AND(<>($1, $21), =($39, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($36, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], ib_income_band_sk=[$12], p_promo_sk=[$13], ss_sold_date_sk=[$14], ss_item_sk=[$15], ss_customer_sk=[$16], ss_cdemo_sk=[$17], ss_hdemo_sk=[$18], ss_addr_sk=[$19], ss_store_sk=[$20], ss_promo_sk=[$21], ss_ticket_number=[$22], ss_wholesale_cost=[$23], ss_list_price=[$24], ss_coupon_amt=[$25], i_item_sk=[$26], i_current_price=[$27], i_color=[$28], i_product_name=[$29], d_date_sk=[$30], d_year=[$31], $f0=[$32], $f1=[$33], $f2=[$34]) + HiveJoin(condition=[AND(=($15, $0), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($17, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($5, $22)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) + HiveProject(p_promo_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_current_price=[$14], i_color=[$15], i_product_name=[$16], d_date_sk=[$17], d_year=[$18]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveJoin(condition=[=($0, $16)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_color=[$17], i_product_name=[$21]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[>($1, *(2, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveProject($f0=[$0], $f1=[$2], $f2=[+(+($5, $6), $7)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($17))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23], cr_reversed_charge=[$24], cr_store_credit=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out new file mode 100644 index 0000000000..1b154a434f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out @@ -0,0 +1,99 @@ +PREHOOK: query: explain cbo +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(s_store_name=[$11], i_item_desc=[$1], revenue=[$7], i_current_price=[$2], i_wholesale_cost=[$3], i_brand=[$4]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4], i_current_price=[$5], i_wholesale_cost=[$6], i_brand=[$8]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $0), <=($2, *(0.1, $4)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out new file mode 100644 index 0000000000..d97f351c4d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out @@ -0,0 +1,508 @@ +PREHOOK: query: explain cbo +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@warehouse +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@warehouse +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county=[$3], w_state=[$4], w_country=[$5], ship_carriers=[CAST(_UTF-16LE'DIAMOND,AIRBORNE'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], year=[CAST(2002):INTEGER], jan_sales=[$6], feb_sales=[$7], mar_sales=[$8], apr_sales=[$9], may_sales=[$10], jun_sales=[$11], jul_sales=[$12], aug_sales=[$13], sep_sales=[$14], oct_sales=[$15], nov_sales=[$16], dec_sales=[$17], jan_sales_per_sq_foot=[$18], feb_sales_per_sq_foot=[$19], mar_sales_per_sq_foot=[$20], apr_sales_per_sq_foot=[$21], may_sales_per_sq_foot=[$22], jun_sales_per_sq_foot=[$23], jul_sales_per_sq_foot=[$24], aug_sales_per_sq_foot=[$25], sep_sales_per_sq_foot=[$26], oct_sales_per_sq_foot=[$27], nov_sales_per_sq_foot=[$28], dec_sales_per_sq_foot=[$29], jan_net=[$30], feb_net=[$31], mar_net=[$32], apr_net=[$33], may_net=[$34], jun_net=[$35], jul_net=[$36], aug_net=[$37], sep_net=[$38], oct_net=[$39], nov_net=[$40], dec_net=[$41]) + HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29], $f30=[$30], $f31=[$31], $f32=[$32], $f33=[$33], $f34=[$34], $f35=[$35], $f36=[$36], $f37=[$37], $f38=[$38], $f39=[$39], $f40=[$40], $f41=[$41]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)], agg#24=[sum($30)], agg#25=[sum($31)], agg#26=[sum($32)], agg#27=[sum($33)], agg#28=[sum($34)], agg#29=[sum($35)], agg#30=[sum($36)], agg#31=[sum($37)], agg#32=[sum($38)], agg#33=[sum($39)], agg#34=[sum($40)], agg#35=[sum($41)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f8=[$6], $f9=[$7], $f10=[$8], $f11=[$9], $f12=[$10], $f13=[$11], $f14=[$12], $f15=[$13], $f16=[$14], $f17=[$15], $f18=[$16], $f19=[$17], $f20=[/($6, CAST($1):DECIMAL(10, 0))], $f21=[/($7, CAST($1):DECIMAL(10, 0))], $f22=[/($8, CAST($1):DECIMAL(10, 0))], $f23=[/($9, CAST($1):DECIMAL(10, 0))], $f24=[/($10, CAST($1):DECIMAL(10, 0))], $f25=[/($11, CAST($1):DECIMAL(10, 0))], $f26=[/($12, CAST($1):DECIMAL(10, 0))], $f27=[/($13, CAST($1):DECIMAL(10, 0))], $f28=[/($14, CAST($1):DECIMAL(10, 0))], $f29=[/($15, CAST($1):DECIMAL(10, 0))], $f30=[/($16, CAST($1):DECIMAL(10, 0))], $f31=[/($17, CAST($1):DECIMAL(10, 0))], $f32=[$18], $f33=[$19], $f34=[$20], $f35=[$21], $f36=[$22], $f37=[$23], $f38=[$24], $f39=[$25], $f40=[$26], $f41=[$27], $f42=[$28], $f43=[$29]) + HiveUnion(all=[true]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[$5], $f5=[$6], $f7=[CASE(=($18, 1), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f8=[CASE(=($18, 2), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f9=[CASE(=($18, 3), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f10=[CASE(=($18, 4), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f11=[CASE(=($18, 5), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f12=[CASE(=($18, 6), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f13=[CASE(=($18, 7), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f14=[CASE(=($18, 8), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f15=[CASE(=($18, 9), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f16=[CASE(=($18, 10), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f17=[CASE(=($18, 11), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f18=[CASE(=($18, 12), *($12, CAST($11):DECIMAL(10, 0)), 0)], $f19=[CASE(=($18, 1), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f20=[CASE(=($18, 2), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f21=[CASE(=($18, 3), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f22=[CASE(=($18, 4), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f23=[CASE(=($18, 5), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f24=[CASE(=($18, 6), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f25=[CASE(=($18, 7), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f26=[CASE(=($18, 8), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f27=[CASE(=($18, 9), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f28=[CASE(=($18, 10), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f29=[CASE(=($18, 11), *($13, CAST($11):DECIMAL(10, 0)), 0)], $f30=[CASE(=($18, 12), *($13, CAST($11):DECIMAL(10, 0)), 0)]) + HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$14], ws_warehouse_sk=[$15], ws_quantity=[$18], ws_sales_price=[$21], ws_net_paid_inc_tax=[$30]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($14))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(t_time_sk=[$0], t_time=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $2, 49530, 78330), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sm_ship_mode_sk=[$0], sm_carrier=[$4]) + HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)]) + HiveProject($f0=[$15], $f1=[$16], $f2=[$17], $f3=[$18], $f4=[$19], $f5=[$20], $f7=[CASE(=($11, 1), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f8=[CASE(=($11, 2), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f9=[CASE(=($11, 3), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f10=[CASE(=($11, 4), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f11=[CASE(=($11, 5), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f12=[CASE(=($11, 6), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f13=[CASE(=($11, 7), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f14=[CASE(=($11, 8), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f15=[CASE(=($11, 9), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f16=[CASE(=($11, 10), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f17=[CASE(=($11, 11), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f18=[CASE(=($11, 12), *($5, CAST($4):DECIMAL(10, 0)), 0)], $f19=[CASE(=($11, 1), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f20=[CASE(=($11, 2), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f21=[CASE(=($11, 3), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f22=[CASE(=($11, 4), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f23=[CASE(=($11, 5), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f24=[CASE(=($11, 6), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f25=[CASE(=($11, 7), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f26=[CASE(=($11, 8), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f27=[CASE(=($11, 9), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f28=[CASE(=($11, 10), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f29=[CASE(=($11, 11), *($6, CAST($4):DECIMAL(10, 0)), 0)], $f30=[CASE(=($11, 12), *($6, CAST($4):DECIMAL(10, 0)), 0)]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14], cs_quantity=[$18], cs_ext_sales_price=[$23], cs_net_paid_inc_ship_tax=[$32]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($13))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(t_time_sk=[$0], t_time=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $2, 49530, 78330), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sm_ship_mode_sk=[$0], sm_carrier=[$4]) + HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query67.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query67.q.out new file mode 100644 index 0000000000..fbe6779bc0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query67.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: explain cbo +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], fetch=[100]) + HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], i_product_name=[$3], d_year=[$4], d_qoy=[$5], d_moy=[$6], s_store_id=[$7], sumsales=[$8], rank_window_0=[$9]) + HiveFilter(condition=[<=($9, 100)]) + HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], i_product_name=[$3], d_year=[$4], d_qoy=[$5], d_moy=[$6], s_store_id=[$7], sumsales=[$8], rank_window_0=[rank() OVER (PARTITION BY $0 ORDER BY $8 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7}], groups=[[{0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4}, {0, 1, 2, 3}, {0, 1, 2}, {0, 1}, {0}, {}]], agg#0=[sum($8)]) + HiveProject($f0=[$3], $f1=[$2], $f2=[$1], $f3=[$4], $f4=[$12], $f5=[$14], $f6=[$13], $f7=[$16], $f8=[CASE(AND(IS NOT NULL($9), IS NOT NULL($8)), *($9, CAST($8):DECIMAL(10, 0)), 0)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_product_name=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3], d_year=[$6], d_moy=[$8], d_qoy=[$10]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out new file mode 100644 index 0000000000..cd71cda18d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(c_last_name=[$3], c_first_name=[$2], ca_city=[$5], bought_city=[$8], ss_ticket_number=[$6], extended_price=[$9], extended_tax=[$11], list_price=[$10]) + HiveJoin(condition=[AND(<>($5, $8), =($7, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[current_addr]) + HiveProject(ss_ticket_number=[$3], ss_customer_sk=[$1], bought_city=[$0], extended_price=[$4], list_price=[$5], extended_tax=[$6]) + HiveAggregate(group=[{1, 3, 5, 7}], agg#0=[sum($8)], agg#1=[sum($9)], agg#2=[sum($10)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($2, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_dom=[$9]) + HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), BETWEEN(false, $9, 1, 2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_city=[$22]) + HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out new file mode 100644 index 0000000000..9089fc8e33 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(cd_gender=[$0], cd_marital_status=[$1], cd_education_status=[$2], cnt1=[$5], cd_purchase_estimate=[$3], cnt2=[$5], cd_credit_rating=[$4], cnt3=[$5]) + HiveAggregate(group=[{6, 7, 8, 9, 10}], agg#0=[count()]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($0, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$3], ca_state=[$4], cd_demo_sk=[$5], cd_gender=[$6], cd_marital_status=[$7], cd_education_status=[$8], cd_purchase_estimate=[$9], cd_credit_rating=[$10], ws_bill_customer_sk0=[$11], $f1=[$12]) + HiveFilter(condition=[IS NULL($12)]) + HiveJoin(condition=[=($0, $11)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $11)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'CO', _UTF-16LE'IL', _UTF-16LE'MN'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 1, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 1, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 1, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out new file mode 100644 index 0000000000..29415ca75f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain cbo +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[/(CAST($1):DOUBLE, $2)], $f2=[/($3, $4)], $f3=[/($5, $6)], $f4=[/($7, $8)]) + HiveAggregate(group=[{1}], agg#0=[sum($6)], agg#1=[count($6)], agg#2=[sum($7)], agg#3=[count($7)], agg#4=[sum($9)], agg#5=[count($9)], agg#6=[sum($8)], agg#7=[count($8)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_promo_sk=[$8], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($8))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0], cd_gender=[CAST(_UTF-16LE'F'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_marital_status=[CAST(_UTF-16LE'W'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], cd_education_status=[CAST(_UTF-16LE'Primary'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'F'), =($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0], p_channel_email=[$9], p_channel_event=[$14]) + HiveFilter(condition=[AND(OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out new file mode 100644 index 0000000000..aa04df83fa --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], s_state=[$1], s_county=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(total_sum=[$2], s_state=[$0], s_county=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col s_state))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$7], $f1=[$6], $f2=[$2]) + HiveJoin(condition=[=($7, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($24), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_state=[$0]) + HiveFilter(condition=[<=($1, 5)]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[$1]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[rank() OVER (PARTITION BY $0 ORDER BY $1 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], window_col_0=[$1]) + HiveProject(s_state=[$0], $f1=[$1]) + HiveAggregate(group=[{6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out new file mode 100644 index 0000000000..4e52893255 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4]) + HiveSortLimit(sort0=[$4], sort1=[$5], dir0=[DESC-nulls-last], dir1=[ASC]) + HiveProject(brand_id=[$2], brand=[$3], t_hour=[$0], t_minute=[$1], ext_price=[$4], (tok_table_or_col i_brand_id)=[$2]) + HiveAggregate(group=[{1, 2, 8, 9}], agg#0=[sum($4)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4], t_meal_time=[$9]) + HiveFilter(condition=[AND(IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($1))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15), IS NOT NULL($1))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manager_id=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($20, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out new file mode 100644 index 0000000000..fca31efa44 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], agg#2=[count()]) + HiveProject($f0=[$15], $f1=[$13], $f2=[$22], $f3=[CASE(IS NULL($28), 1, 0)], $f4=[CASE(IS NOT NULL($28), 1, 0)]) + HiveJoin(condition=[AND(=($29, $4), =($30, $6))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$22], cd_marital_status=[$23], hd_demo_sk=[$24], hd_buy_potential=[$25], d_date_sk=[$18], d_date=[$19], d_week_seq=[$20], d_year=[$21], d_date_sk0=[$27], d_week_seq0=[$28], d_date_sk1=[$6], d_date0=[$7], p_promo_sk=[$26]) + HiveJoin(condition=[AND(=($0, $27), =($20, $28))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($14, $1), <($3, $17))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(d_date_sk=[$0], d_date=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_date0=[$13], d_week_seq=[$14], d_year=[$15], cd_demo_sk=[$16], cd_marital_status=[$17], hd_demo_sk=[$18], hd_buy_potential=[$19], p_promo_sk=[$20]) + HiveJoin(condition=[AND(=($5, $0), >(CAST($1):DOUBLE, +(CAST($13):DOUBLE, 5)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($5, $16)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_week_seq=[$4], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[CAST(_UTF-16LE'M'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(hd_demo_sk=[$0], hd_buy_potential=[CAST(_UTF-16LE'1001-5000'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1001-5000'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(p_promo_sk=[$0]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out new file mode 100644 index 0000000000..d28a896fd7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out @@ -0,0 +1,93 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last]) + HiveProject(c_last_name=[$3], c_first_name=[$2], c_salutation=[$1], c_preferred_cust_flag=[$4], ss_ticket_number=[$5], cnt=[$7]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveFilter(condition=[BETWEEN(false, $2, 1, 5)]) + HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[count()]) + HiveJoin(condition=[=($3, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_dom=[$9]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), BETWEEN(false, $9, 1, 2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(hd_demo_sk=[$0], hd_buy_potential=[$2], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), null), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0], s_county=[$23]) + HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out new file mode 100644 index 0000000000..32d6e03706 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out @@ -0,0 +1,191 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2]) + HiveJoin(condition=[AND(=($0, $6), CASE(CAST(IS NOT NULL($7)):BOOLEAN, CASE(CAST(IS NOT NULL($9)):BOOLEAN, >(/($5, $9), /($3, $7)), >(null, /($3, $7))), CASE(CAST(IS NOT NULL($9)):BOOLEAN, >(/($5, $9), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2], year_total=[$4]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$4]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$4]) + HiveFilter(condition=[>($4, 0)]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$4]) + HiveFilter(condition=[>($4, 0)]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out new file mode 100644 index 0000000000..3d87d1b088 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$5], sales_cnt_diff=[$6], sales_amt_diff=[$7]) + HiveSortLimit(sort0=[$6], dir0=[ASC], fetch=[100]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$10], sales_cnt_diff=[-($10, $4)], sales_amt_diff=[-($11, $5)]) + HiveJoin(condition=[AND(AND(AND(AND(=($6, $0), =($7, $1)), =($8, $2)), =($9, $3)), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out new file mode 100644 index 0000000000..74f888cffa --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out @@ -0,0 +1,101 @@ +PREHOOK: query: explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(channel=[$0], col_name=[$1], d_year=[$2], d_qoy=[$3], i_category=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 2, 3, 4}], agg#0=[count()], agg#1=[sum($5)]) + HiveProject(channel=[$0], col_name=[$1], d_year=[$2], d_qoy=[$3], i_category=[$4], ext_sales_price=[$5]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store'], col_name=[_UTF-16LE'ss_addr_sk'], d_year=[$1], d_qoy=[$2], i_category=[$4], ext_sales_price=[$8]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[null], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NULL($6), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(channel=[_UTF-16LE'web'], col_name=[_UTF-16LE'ws_web_page_sk'], d_year=[$7], d_qoy=[$8], i_category=[$5], ext_sales_price=[$3]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_page_sk=[null], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NULL($12), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], col_name=[_UTF-16LE'cs_warehouse_sk'], d_year=[$7], d_qoy=[$8], i_category=[$5], ext_sales_price=[$3]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[null], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NULL($14), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out new file mode 100644 index 0000000000..2c42995747 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out @@ -0,0 +1,316 @@ +Warning: Shuffle Join MERGEJOIN[317][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +PREHOOK: query: explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[$0], sales=[$1], returns=[CASE(IS NOT NULL($4), $4, 0)], profit=[-($2, CASE(IS NOT NULL($5), $5, 0))]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{6}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_store_sk=[$7], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[$0], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_call_center_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[$0], sales=[$1], returns=[CASE(IS NOT NULL($4), $4, 0)], profit=[-($2, CASE(IS NOT NULL($5), $5, 0))]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_web_page_sk=[$12], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wp_web_page_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_web_page_sk=[$11], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out new file mode 100644 index 0000000000..66b345ce68 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query78.q.out @@ -0,0 +1,183 @@ +PREHOOK: query: explain cbo +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[$1], ratio=[$2], store_qty=[$3], store_wholesale_cost=[$4], store_sales_price=[$5], other_chan_qty=[$6], other_chan_wholesale_cost=[$7], other_chan_sales_price=[$8]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$9], sort3=[$10], sort4=[$11], sort5=[$6], sort6=[$7], sort7=[$8], sort8=[$12], dir0=[ASC], dir1=[ASC], dir2=[DESC-nulls-last], dir3=[DESC-nulls-last], dir4=[DESC-nulls-last], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ratio=[round(/(CAST($2):DOUBLE, CAST(CASE(AND(IS NOT NULL($7), IS NOT NULL($11)), +($7, $11), 1)):DOUBLE), 2)], store_qty=[$2], store_wholesale_cost=[$3], store_sales_price=[$4], other_chan_qty=[+(CASE(IS NOT NULL($7), $7, 0), CASE(IS NOT NULL($11), $11, 0))], other_chan_wholesale_cost=[+(CASE(IS NOT NULL($8), $8, 0), CASE(IS NOT NULL($12), $12, 0))], other_chan_sales_price=[+(CASE(IS NOT NULL($9), $9, 0), CASE(IS NOT NULL($13), $13, 0))], ss_qty=[$2], ss_wc=[$3], ss_sp=[$4], (tok_function round (/ (tok_table_or_col ss_qty) (tok_function coalesce (+ (tok_table_or_col ws_qty) (tok_table_or_col cs_qty)) 1)) 2)=[round(/(CAST($2):DOUBLE, CAST(CASE(AND(IS NOT NULL($7), IS NOT NULL($11)), +($7, $11), 1)):DOUBLE), 2)]) + HiveFilter(condition=[CASE(IS NOT NULL($11), >($11, 0), false)]) + HiveJoin(condition=[=($10, $1)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4], ws_item_sk=[$5], ws_bill_customer_sk=[$6], $f20=[$7], $f30=[$8], $f40=[$9]) + HiveFilter(condition=[CASE(IS NOT NULL($7), >($7, 0), false)]) + HiveJoin(condition=[AND(=($5, $0), =($6, $1))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{3, 4}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_ticket_number=[$3], ss_quantity=[$4], ss_wholesale_cost=[$5], ss_sales_price=[$6], sr_item_sk=[$7], sr_ticket_number=[$8]) + HiveFilter(condition=[IS NULL($8)]) + HiveJoin(condition=[AND(=($8, $3), =($1, $7))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_wholesale_cost=[$11], ss_sales_price=[$13]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{3, 4}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_customer_sk=[$2], ws_order_number=[$3], ws_quantity=[$4], ws_wholesale_cost=[$5], ws_sales_price=[$6], wr_item_sk=[$7], wr_order_number=[$8]) + HiveFilter(condition=[IS NULL($8)]) + HiveJoin(condition=[AND(=($8, $3), =($1, $7))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_order_number=[$17], ws_quantity=[$18], ws_wholesale_cost=[$19], ws_sales_price=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject($f2=[$0], $f3=[$2], $f4=[$3], $f5=[$4]) + HiveAggregate(group=[{3, 4}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_order_number=[$3], cs_quantity=[$4], cs_wholesale_cost=[$5], cs_sales_price=[$6], cr_item_sk=[$7], cr_order_number=[$8]) + HiveFilter(condition=[IS NULL($8)]) + HiveJoin(condition=[AND(=($8, $3), =($2, $7))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_wholesale_cost=[$19], cs_sales_price=[$21]) + HiveFilter(condition=[AND(=($15, $15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query79.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query79.q.out new file mode 100644 index 0000000000..6da8ac2300 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query79.q.out @@ -0,0 +1,82 @@ +PREHOOK: query: explain cbo +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(c_last_name=[$0], c_first_name=[$1], _o__c2=[$2], ss_ticket_number=[$3], amt=[$4], profit=[$5]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$6], sort3=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(c_last_name=[$2], c_first_name=[$1], _o__c2=[substr($5, 1, 30)], ss_ticket_number=[$3], amt=[$6], profit=[$7], (tok_function substr (tok_table_or_col s_city) 1 30)=[substr($5, 1, 30)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$2], ss_customer_sk=[$0], s_city=[$3], amt=[$4], profit=[$5]) + HiveAggregate(group=[{1, 3, 5, 13}], agg#0=[sum($6)], agg#1=[sum($7)]) + HiveJoin(condition=[=($2, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_dow=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), =($7, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_number_employees=[$6], s_city=[$22]) + HiveFilter(condition=[AND(BETWEEN(false, $6, 200, 295), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(OR(=($3, 8), >($4, 0)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out new file mode 100644 index 0000000000..5c4f7a26ff --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out @@ -0,0 +1,266 @@ +PREHOOK: query: explain cbo +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(s_store_name=[$0], $f1=[$1]) + HiveAggregate(group=[{8}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER], d_qoy=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], s_store_sk=[$1], s_store_name=[$2], s_zip=[$3]) + HiveJoin(condition=[=(substr($3, 1, 2), substr($0, 1, 2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveFilter(condition=[=($1, 2)]) + HiveAggregate(group=[{0}], agg#0=[count($1)]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject(ca_zip=[substr($9, 1, 5)]) + HiveFilter(condition=[AND(IN(substr($9, 1, 5), _UTF-16LE'89436', _UTF-16LE'30868', _UTF-16LE'65085', _UTF-16LE'22977', _UTF-16LE'83927', _UTF-16LE'77557', _UTF-16LE'58429', _UTF-16LE'40697', _UTF-16LE'80614', _UTF-16LE'10502', _UTF-16LE'32779', _UTF-16LE'91137', _UTF-16LE'61265', _UTF-16LE'98294', _UTF-16LE'17921', _UTF-16LE'18427', _UTF-16LE'21203', _UTF-16LE'59362', _UTF-16LE'87291', _UTF-16LE'84093', _UTF-16LE'21505', _UTF-16LE'17184', _UTF-16LE'10866', _UTF-16LE'67898', _UTF-16LE'25797', _UTF-16LE'28055', _UTF-16LE'18377', _UTF-16LE'80332', _UTF-16LE'74535', _UTF-16LE'21757', _UTF-16LE'29742', _UTF-16LE'90885', _UTF-16LE'29898', _UTF-16LE'17819', _UTF-16LE'40811', _UTF-16LE'25990', _UTF-16LE'47513', _UTF-16LE'89531', _UTF-16LE'91068', _UTF-16LE'10391', _UTF-16LE'18846', _UTF-16LE'99223', _UTF-16LE'82637', _UTF-16LE'41368', _UTF-16LE'83658', _UTF-16LE'86199', _UTF-16LE'81625', _UTF-16LE'26696', _UTF-16LE'89338', _UTF-16LE'88425', _UTF-16LE'32200', _UTF-16LE'81427', _UTF-16LE'19053', _UTF-16LE'77471', _UTF-16LE'36610', _UTF-16LE'99823', _UTF-16LE'43276', _UTF-16LE'41249', _UTF-16LE'48584', _UTF-16LE'83550', _UTF-16LE'82276', _UTF-16LE'18842', _UTF-16LE'78890', _UTF-16LE'14090', _UTF-16LE'38123', _UTF-16LE'40936', _UTF-16LE'34425', _UTF-16LE'19850', _UTF-16LE'43286', _UTF-16LE'80072', _UTF-16LE'79188', _UTF-16LE'54191', _UTF-16LE'11395', _UTF-16LE'50497', _UTF-16LE'84861', _UTF-16LE'90733', _UTF-16LE'21068', _UTF-16LE'57666', _UTF-16LE'37119', _UTF-16LE'25004', _UTF-16LE'57835', _UTF-16LE'70067', _UTF-16LE'62878', _UTF-16LE'95806', _UTF-16LE'19303', _UTF-16LE'18840', _UTF-16LE'19124', _UTF-16LE'29785', _UTF-16LE'16737', _UTF-16LE'16022', _UTF-16LE'49613', _UTF-16LE'89977', _UTF-16LE'68310', _UTF-16LE'60069', _UTF-16LE'98360', _UTF-16LE'48649', _UTF-16LE'39050', _UTF-16LE'41793', _UTF-16LE'25002', _UTF-16LE'27413', _UTF-16LE'39736', _UTF-16LE'47208', _UTF-16LE'16515', _UTF-16LE'94808', _UTF-16LE'57648', _UTF-16LE'15009', _UTF-16LE'80015', _UTF-16LE'42961', _UTF-16LE'63982', _UTF-16LE'21744', _UTF-16LE'71853', _UTF-16LE'81087', _UTF-16LE'67468', _UTF-16LE'34175', _UTF-16LE'64008', _UTF-16LE'20261', _UTF-16LE'11201', _UTF-16LE'51799', _UTF-16LE'48043', _UTF-16LE'45645', _UTF-16LE'61163', _UTF-16LE'48375', _UTF-16LE'36447', _UTF-16LE'57042', _UTF-16LE'21218', _UTF-16LE'41100', _UTF-16LE'89951', _UTF-16LE'22745', _UTF-16LE'35851', _UTF-16LE'83326', _UTF-16LE'61125', _UTF-16LE'78298', _UTF-16LE'80752', _UTF-16LE'49858', _UTF-16LE'52940', _UTF-16LE'96976', _UTF-16LE'63792', _UTF-16LE'11376', _UTF-16LE'53582', _UTF-16LE'18717', _UTF-16LE'90226', _UTF-16LE'50530', _UTF-16LE'94203', _UTF-16LE'99447', _UTF-16LE'27670', _UTF-16LE'96577', _UTF-16LE'57856', _UTF-16LE'56372', _UTF-16LE'16165', _UTF-16LE'23427', _UTF-16LE'54561', _UTF-16LE'28806', _UTF-16LE'44439', _UTF-16LE'22926', _UTF-16LE'30123', _UTF-16LE'61451', _UTF-16LE'92397', _UTF-16LE'56979', _UTF-16LE'92309', _UTF-16LE'70873', _UTF-16LE'13355', _UTF-16LE'21801', _UTF-16LE'46346', _UTF-16LE'37562', _UTF-16LE'56458', _UTF-16LE'28286', _UTF-16LE'47306', _UTF-16LE'99555', _UTF-16LE'69399', _UTF-16LE'26234', _UTF-16LE'47546', _UTF-16LE'49661', _UTF-16LE'88601', _UTF-16LE'35943', _UTF-16LE'39936', _UTF-16LE'25632', _UTF-16LE'24611', _UTF-16LE'44166', _UTF-16LE'56648', _UTF-16LE'30379', _UTF-16LE'59785', _UTF-16LE'11110', _UTF-16LE'14329', _UTF-16LE'93815', _UTF-16LE'52226', _UTF-16LE'71381', _UTF-16LE'13842', _UTF-16LE'25612', _UTF-16LE'63294', _UTF-16LE'14664', _UTF-16LE'21077', _UTF-16LE'82626', _UTF-16LE'18799', _UTF-16LE'60915', _UTF-16LE'81020', _UTF-16LE'56447', _UTF-16LE'76619', _UTF-16LE'11433', _UTF-16LE'13414', _UTF-16LE'42548', _UTF-16LE'92713', _UTF-16LE'70467', _UTF-16LE'30884', _UTF-16LE'47484', _UTF-16LE'16072', _UTF-16LE'38936', _UTF-16LE'13036', _UTF-16LE'88376', _UTF-16LE'45539', _UTF-16LE'35901', _UTF-16LE'19506', _UTF-16LE'65690', _UTF-16LE'73957', _UTF-16LE'71850', _UTF-16LE'49231', _UTF-16LE'14276', _UTF-16LE'20005', _UTF-16LE'18384', _UTF-16LE'76615', _UTF-16LE'11635', _UTF-16LE'38177', _UTF-16LE'55607', _UTF-16LE'41369', _UTF-16LE'95447', _UTF-16LE'58581', _UTF-16LE'58149', _UTF-16LE'91946', _UTF-16LE'33790', _UTF-16LE'76232', _UTF-16LE'75692', _UTF-16LE'95464', _UTF-16LE'22246', _UTF-16LE'51061', _UTF-16LE'56692', _UTF-16LE'53121', _UTF-16LE'77209', _UTF-16LE'15482', _UTF-16LE'10688', _UTF-16LE'14868', _UTF-16LE'45907', _UTF-16LE'73520', _UTF-16LE'72666', _UTF-16LE'25734', _UTF-16LE'17959', _UTF-16LE'24677', _UTF-16LE'66446', _UTF-16LE'94627', _UTF-16LE'53535', _UTF-16LE'15560', _UTF-16LE'41967', _UTF-16LE'69297', _UTF-16LE'11929', _UTF-16LE'59403', _UTF-16LE'33283', _UTF-16LE'52232', _UTF-16LE'57350', _UTF-16LE'43933', _UTF-16LE'40921', _UTF-16LE'36635', _UTF-16LE'10827', _UTF-16LE'71286', _UTF-16LE'19736', _UTF-16LE'80619', _UTF-16LE'25251', _UTF-16LE'95042', _UTF-16LE'15526', _UTF-16LE'36496', _UTF-16LE'55854', _UTF-16LE'49124', _UTF-16LE'81980', _UTF-16LE'35375', _UTF-16LE'49157', _UTF-16LE'63512', _UTF-16LE'28944', _UTF-16LE'14946', _UTF-16LE'36503', _UTF-16LE'54010', _UTF-16LE'18767', _UTF-16LE'23969', _UTF-16LE'43905', _UTF-16LE'66979', _UTF-16LE'33113', _UTF-16LE'21286', _UTF-16LE'58471', _UTF-16LE'59080', _UTF-16LE'13395', _UTF-16LE'79144', _UTF-16LE'70373', _UTF-16LE'67031', _UTF-16LE'38360', _UTF-16LE'26705', _UTF-16LE'50906', _UTF-16LE'52406', _UTF-16LE'26066', _UTF-16LE'73146', _UTF-16LE'15884', _UTF-16LE'31897', _UTF-16LE'30045', _UTF-16LE'61068', _UTF-16LE'45550', _UTF-16LE'92454', _UTF-16LE'13376', _UTF-16LE'14354', _UTF-16LE'19770', _UTF-16LE'22928', _UTF-16LE'97790', _UTF-16LE'50723', _UTF-16LE'46081', _UTF-16LE'30202', _UTF-16LE'14410', _UTF-16LE'20223', _UTF-16LE'88500', _UTF-16LE'67298', _UTF-16LE'13261', _UTF-16LE'14172', _UTF-16LE'81410', _UTF-16LE'93578', _UTF-16LE'83583', _UTF-16LE'46047', _UTF-16LE'94167', _UTF-16LE'82564', _UTF-16LE'21156', _UTF-16LE'15799', _UTF-16LE'86709', _UTF-16LE'37931', _UTF-16LE'74703', _UTF-16LE'83103', _UTF-16LE'23054', _UTF-16LE'70470', _UTF-16LE'72008', _UTF-16LE'49247', _UTF-16LE'91911', _UTF-16LE'69998', _UTF-16LE'20961', _UTF-16LE'70070', _UTF-16LE'63197', _UTF-16LE'54853', _UTF-16LE'88191', _UTF-16LE'91830', _UTF-16LE'49521', _UTF-16LE'19454', _UTF-16LE'81450', _UTF-16LE'89091', _UTF-16LE'62378', _UTF-16LE'25683', _UTF-16LE'61869', _UTF-16LE'51744', _UTF-16LE'36580', _UTF-16LE'85778', _UTF-16LE'36871', _UTF-16LE'48121', _UTF-16LE'28810', _UTF-16LE'83712', _UTF-16LE'45486', _UTF-16LE'67393', _UTF-16LE'26935', _UTF-16LE'42393', _UTF-16LE'20132', _UTF-16LE'55349', _UTF-16LE'86057', _UTF-16LE'21309', _UTF-16LE'80218', _UTF-16LE'10094', _UTF-16LE'11357', _UTF-16LE'48819', _UTF-16LE'39734', _UTF-16LE'40758', _UTF-16LE'30432', _UTF-16LE'21204', _UTF-16LE'29467', _UTF-16LE'30214', _UTF-16LE'61024', _UTF-16LE'55307', _UTF-16LE'74621', _UTF-16LE'11622', _UTF-16LE'68908', _UTF-16LE'33032', _UTF-16LE'52868', _UTF-16LE'99194', _UTF-16LE'99900', _UTF-16LE'84936', _UTF-16LE'69036', _UTF-16LE'99149', _UTF-16LE'45013', _UTF-16LE'32895', _UTF-16LE'59004', _UTF-16LE'32322', _UTF-16LE'14933', _UTF-16LE'32936', _UTF-16LE'33562', _UTF-16LE'72550', _UTF-16LE'27385', _UTF-16LE'58049', _UTF-16LE'58200', _UTF-16LE'16808', _UTF-16LE'21360', _UTF-16LE'32961', _UTF-16LE'18586', _UTF-16LE'79307', _UTF-16LE'15492'), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject(ca_zip=[substr($0, 1, 5)]) + HiveFilter(condition=[>($1, 10)]) + HiveAggregate(group=[{1}], agg#0=[count()]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_zip=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(c_current_addr_sk=[$4], c_preferred_cust_flag=[CAST(_UTF-16LE'Y'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($10, _UTF-16LE'Y'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL(substr($25, 1, 2)))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out new file mode 100644 index 0000000000..9c4329473d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out @@ -0,0 +1,301 @@ +PREHOOK: query: explain cbo +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_page +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_page +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[||(_UTF-16LE'store', $0)], sales=[$1], returns=[$2], profit=[$3]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) + HiveProject($f0=[$1], $f1=[$9], $f2=[CASE(IS NOT NULL($13), $13, 0)], $f3=[-($10, CASE(IS NOT NULL($14), $14, 0))]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($5, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_current_price=[$5]) + HiveFilter(condition=[AND(>($5, 50), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($8))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0], p_channel_tv=[CAST(_UTF-16LE'N'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[||(_UTF-16LE'catalog_page', $0)], sales=[$1], returns=[$2], profit=[$3]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) + HiveProject($f0=[$1], $f1=[$9], $f2=[CASE(IS NOT NULL($13), $13, 0)], $f3=[-($10, CASE(IS NOT NULL($14), $14, 0))]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) + HiveJoin(condition=[=($5, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_current_price=[$5]) + HiveFilter(condition=[AND(>($5, 50), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$12], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($15), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0], p_channel_tv=[CAST(_UTF-16LE'N'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$2], profit=[$3]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) + HiveProject($f0=[$18], $f1=[$9], $f2=[CASE(IS NOT NULL($13), $13, 0)], $f3=[-($10, CASE(IS NOT NULL($14), $14, 0))]) + HiveJoin(condition=[=($6, $17)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_promo_sk=[$0], p_channel_tv=[CAST(_UTF-16LE'N'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_current_price=[$5]) + HiveFilter(condition=[AND(>($5, 50), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_site_sk=[$13], ws_promo_sk=[$16], ws_order_number=[$17], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($13), IS NOT NULL($3), IS NOT NULL($16))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out new file mode 100644 index 0000000000..e13017f61d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out @@ -0,0 +1,113 @@ +PREHOOK: query: explain cbo +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_name=[$3], ca_street_number=[$4], ca_street_name=[$5], ca_street_type=[$6], ca_suite_number=[$7], ca_city=[$8], ca_county=[$9], ca_state=[CAST(_UTF-16LE'IL'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ca_zip=[$10], ca_country=[$11], ca_gmt_offset=[$12], ca_location_type=[$13], ctr_total_return=[$14]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10], sort11=[$11], sort12=[$12], sort13=[$13], sort14=[$14], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], dir10=[ASC], dir11=[ASC], dir12=[ASC], dir13=[ASC], dir14=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1], c_salutation=[$3], c_first_name=[$4], c_last_name=[$5], ca_street_number=[$7], ca_street_name=[$8], ca_street_type=[$9], ca_suite_number=[$10], ca_city=[$11], ca_county=[$12], ca_zip=[$14], ca_country=[$15], ca_gmt_offset=[$16], ca_location_type=[$17], ctr_total_return=[$20]) + HiveJoin(condition=[=($18, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_current_addr_sk=[$4], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_state=[CAST(_UTF-16LE'IL'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'IL'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) + HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10), IS NOT NULL($7))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_state=[$0]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ca_state=[$0], cr_returning_customer_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query82.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query82.q.out new file mode 100644 index 0000000000..08d9992997 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query82.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) + HiveAggregate(group=[{2, 3, 4}]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IN($13, 437, 129, 727, 663), BETWEEN(false, $5, 30, 60), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$2], d_date_sk=[$3], d_date=[$4]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 100, 500), IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00, 2002-07-29 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out new file mode 100644 index 0000000000..d5a3d667ea --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_id=[$0], sr_item_qty=[$3], sr_dev=[*(/(/(CAST($3):DOUBLE, CAST(+(+($3, $1), $5)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], cr_item_qty=[$1], cr_dev=[*(/(/(CAST($1):DOUBLE, CAST(+(+($3, $1), $5)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], wr_item_qty=[$5], wr_dev=[*(/(/(CAST($5):DOUBLE, CAST(+(+($3, $1), $5)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], average=[/(CAST(+(+($3, $1), $5)):DECIMAL(19, 0), 3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_item_sk=[$2], cr_return_quantity=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_return_quantity=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query84.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query84.q.out new file mode 100644 index 0000000000..de765ab836 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query84.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(customer_id=[$0], customername=[$1]) + HiveSortLimit(sort0=[$2], dir0=[ASC], fetch=[100]) + HiveProject(customer_id=[$2], customername=[||(||($7, _UTF-16LE', '), $6)], c_customer_id=[$2]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_cdemo_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(c_customer_id=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], c_first_name=[$4], c_last_name=[$5], ca_address_sk=[$6], ca_city=[$7], hd_demo_sk=[$8], hd_income_band_sk=[$9], ib_income_band_sk=[$10], ib_lower_bound=[$11], ib_upper_bound=[$12]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$1], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_city=[CAST(_UTF-16LE'Hopewell'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($6, _UTF-16LE'Hopewell'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2], ib_lower_bound=[$3], ib_upper_bound=[$4]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ib_income_band_sk=[$0], ib_lower_bound=[$1], ib_upper_bound=[$2]) + HiveFilter(condition=[AND(>=($1, 32287), <=($2, 82287), IS NOT NULL($0))]) + HiveTableScan(table=[[default, income_band]], table:alias=[income_band]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out new file mode 100644 index 0000000000..50474bc269 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@reason +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) + HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) + HiveAggregate(group=[{7}], agg#0=[sum($26)], agg#1=[count($26)], agg#2=[sum($21)], agg#3=[count($21)], agg#4=[sum($20)], agg#5=[count($20)]) + HiveJoin(condition=[AND(AND(=($0, $17), =($4, $1)), =($5, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $24, 100, 150)), AND(=($1, _UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $24, 50, 100)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $24, 150, 200))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveJoin(condition=[AND(=($0, $5), OR(AND(IN($1, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $17, 100, 200)), AND(IN($1, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $17, 150, 300)), AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $17, 50, 250))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[AND(=($9, $0), =($11, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($13), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_page_sk=[$12], ws_order_number=[$17], ws_quantity=[$18], ws_sales_price=[$21], ws_net_profit=[$33]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $21, 100, 150), BETWEEN(false, $21, 50, 100), BETWEEN(false, $21, 150, 200)), OR(BETWEEN(false, $33, 100, 200), BETWEEN(false, $33, 150, 300), BETWEEN(false, $33, 50, 250)), IS NOT NULL($3), IS NOT NULL($17), IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query86.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query86.q.out new file mode 100644 index 0000000000..0832a6360a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query86.q.out @@ -0,0 +1,77 @@ +PREHOOK: query: explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(total_sum=[$2], i_category=[$0], i_class=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col i_category))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$1], $f2=[$5]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out new file mode 100644 index 0000000000..35f319d953 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[AND(>($3, 0), =(*($3, 2), $4))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f5=[*($3, $4)]) + HiveUnion(all=[true]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[2], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[AND(>($3, 0), =(*($3, 2), $4))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f5=[*($3, $4)]) + HiveUnion(all=[true]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[2], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out new file mode 100644 index 0000000000..1f86e3ab46 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out @@ -0,0 +1,347 @@ +Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[$0], $f00=[$7], $f01=[$6], $f02=[$5], $f03=[$4], $f04=[$3], $f05=[$2], $f06=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(8):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(12):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 12), <($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(11):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 11), >=($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(11):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 11), <($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(10):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 10), >=($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(10):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 10), <($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(9):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 9), >=($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(9):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 9), <($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out new file mode 100644 index 0000000000..72f22b64c5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)]) + HiveFilter(condition=[CASE(<>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1), null)]) + HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{5, 6, 7, 10, 12, 13}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical', _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'), IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics', _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical')), AND(IN($12, _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), IN($10, _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[$8]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out new file mode 100644 index 0000000000..3ec1916262 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out @@ -0,0 +1,200 @@ +Warning: Shuffle Join MERGEJOIN[171][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[172][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[173][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[174][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[175][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[176][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[181][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11]] in Stage 'Reducer 12' is a cross product +Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14, $hdt$_15]] in Stage 'Reducer 16' is a cross product +PREHOOK: query: explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(bucket1=[CASE(>($1, 409437), $2, $3)], bucket2=[CASE(>($4, 4595804), $5, $6)], bucket3=[CASE(>($7, 7887297), $8, $9)], bucket4=[CASE(>($10, 10872978), $11, $12)], bucket5=[CASE(>($13, 43571537), $14, $15)]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(r_reason_sk=[CAST(1):INTEGER]) + HiveFilter(condition=[=($0, 1)]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out new file mode 100644 index 0000000000..c1567c7f61 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out @@ -0,0 +1,92 @@ +Warning: Shuffle Join MERGEJOIN[152][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +PREHOOK: query: explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(am_pm_ratio=[/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4))]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wp_web_page_sk=[$0], wp_char_count=[$10]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 5000, 5200), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveProject(t_time_sk=[$0], t_hour=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 6, 7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[CAST(8):INTEGER]) + HiveFilter(condition=[AND(=($3, 8), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wp_web_page_sk=[$0], wp_char_count=[$10]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 5000, 5200), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveProject(t_time_sk=[$0], t_hour=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 14, 15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[CAST(8):INTEGER]) + HiveFilter(condition=[AND(=($3, 8), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out new file mode 100644 index 0000000000..6b58ccc61e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out @@ -0,0 +1,110 @@ +PREHOOK: query: explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss=[$3]) + HiveSortLimit(sort0=[$4], dir0=[DESC-nulls-last]) + HiveProject(call_center=[$2], call_center_name=[$3], manager=[$4], returns_loss=[$5], (tok_function sum (tok_table_or_col cr_net_loss))=[$5]) + HiveAggregate(group=[{7, 8, 17, 18, 19}], agg#0=[sum($12)]) + HiveJoin(condition=[=($20, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($10, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-7):DECIMAL(5, 2)]) + HiveFilter(condition=[AND(=($11, -7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6], cc_call_center_sk=[$7], cc_call_center_id=[$8], cc_name=[$9], cc_manager=[$10]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_call_center_sk=[$11], cr_net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(11):INTEGER]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_call_center_id=[$1], cc_name=[$6], cc_manager=[$11]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(hd_demo_sk=[$0], hd_buy_potential=[$2]) + HiveFilter(condition=[AND(LIKE($2, _UTF-16LE'0-500%'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query92.q.out new file mode 100644 index 0000000000..5a0e1da525 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query92.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(excess discount amount=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(excess discount amount=[$0], (tok_function sum (tok_table_or_col ws_ext_discount_amt))=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[AND(>($2, CAST(*(1.3, $6)):DECIMAL(14, 7)), =($7, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_item_sk=[$0], $f1=[$1], i_item_sk=[$2], i_manufact_id=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_manufact_id=[CAST(269):INTEGER]) + HiveFilter(condition=[AND(=($13, 269), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out new file mode 100644 index 0000000000..60b7557d4d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))]) + HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(r_reason_sk=[$0], r_reason_desc=[CAST(_UTF-16LE'Did not like the warranty'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'Did not like the warranty'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out new file mode 100644 index 0000000000..4f97a67466 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13), IS NOT NULL($17))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($15))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(wr_order_number0=[$0], $f1=[true]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out new file mode 100644 index 0000000000..c248890515 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $7)], agg#1=[sum($8)], agg#2=[sum($9)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_order_number=[$0]) + HiveAggregate(group=[{14}]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(wr_returned_date_sk=[$0], wr_returned_time_sk=[$1], wr_item_sk=[$2], wr_refunded_customer_sk=[$3], wr_refunded_cdemo_sk=[$4], wr_refunded_hdemo_sk=[$5], wr_refunded_addr_sk=[$6], wr_returning_customer_sk=[$7], wr_returning_cdemo_sk=[$8], wr_returning_hdemo_sk=[$9], wr_returning_addr_sk=[$10], wr_web_page_sk=[$11], wr_reason_sk=[$12], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15], wr_return_tax=[$16], wr_return_amt_inc_tax=[$17], wr_fee=[$18], wr_return_ship_cost=[$19], wr_refunded_cash=[$20], wr_reversed_charge=[$21], wr_account_credit=[$22], wr_net_loss=[$23], BLOCK__OFFSET__INSIDE__FILE=[$24], INPUT__FILE__NAME=[$25], ROW__ID=[$26]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out new file mode 100644 index 0000000000..6367e9899c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(_o__c0=[$0], (tok_functionstar count)=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(8):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[CAST(5):INTEGER]) + HiveFilter(condition=[AND(=($3, 5), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out new file mode 100644 index 0000000000..5cd85827db --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[sum($1)], agg#2=[sum($2)]) + HiveProject($f0=[CASE(AND(IS NOT NULL($0), IS NULL($2)), 1, 0)], $f1=[CASE(AND(IS NULL($0), IS NOT NULL($2)), 1, 0)], $f2=[CASE(AND(IS NOT NULL($0), IS NOT NULL($2)), 1, 0)]) + HiveJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[full], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_item_sk=[$0]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_bill_customer_sk=[$0], cs_item_sk=[$1]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out new file mode 100644 index 0000000000..804885cc83 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out new file mode 100644 index 0000000000..75d0e605d4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7], (tok_function substr (tok_table_or_col w_warehouse_name) 1 20)=[$0]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)]) + HiveProject($f0=[substr($10, 1, 20)], $f1=[$12], $f2=[$8], $f3=[CASE(<=(-($1, $0), 30), 1, 0)], $f4=[CASE(AND(>(-($1, $0), 30), <=(-($1, $0), 60)), 1, 0)], $f5=[CASE(AND(>(-($1, $0), 60), <=(-($1, $0), 90)), 1, 0)], $f6=[CASE(AND(>(-($1, $0), 90), <=(-($1, $0), 120)), 1, 0)], $f7=[CASE(>(-($1, $0), 120), 1, 0)]) + HiveJoin(condition=[=($3, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_call_center_sk=[$11], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(sm_ship_mode_sk=[$0], sm_type=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out new file mode 100644 index 0000000000..13801ffc6c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out @@ -0,0 +1,90 @@ +PREHOOK: query: explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1]) + HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($24, _UTF-16LE'NM')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out new file mode 100644 index 0000000000..42e3df0a76 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=[$8], sort6=[$10], sort7=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], fetch=[100]) + HiveProject(cd_gender=[$0], cd_marital_status=[$1], cd_education_status=[$2], cnt1=[$8], cd_purchase_estimate=[$3], cnt2=[$8], cd_credit_rating=[$4], cnt3=[$8], cd_dep_count=[$5], cnt4=[$8], cd_dep_employed_count=[$6], cnt5=[$8], cd_dep_college_count=[$7], cnt6=[$8]) + HiveAggregate(group=[{6, 7, 8, 9, 10, 11, 12, 13}], agg#0=[count()]) + HiveFilter(condition=[OR(IS NOT NULL($15), IS NOT NULL($17))]) + HiveJoin(condition=[=($0, $16)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $14)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IN($7, _UTF-16LE'Walker County', _UTF-16LE'Richland County', _UTF-16LE'Gaines County', _UTF-16LE'Douglas County', _UTF-16LE'Dona Ana County')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out new file mode 100644 index 0000000000..74cef877ea --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -0,0 +1,216 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(c_preferred_cust_flag=[$3]) + HiveJoin(condition=[AND(=($2, $0), CASE($5, CASE($6, >(/($1, $4), $7), $8), CASE($6, >(/($1, $4), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$3], c_preferred_cust_flag0=[$1], year_total1=[$7], CAST=[$5], CAST25=[$8], /=[/($2, $4)], >=[>(null, /($2, $4))]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out new file mode 100644 index 0000000000..ab4a8377e6 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out new file mode 100644 index 0000000000..ae90ac9e40 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out @@ -0,0 +1,138 @@ +PREHOOK: query: explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) + HiveAggregate(group=[{}], agg#0=[sum($7)], agg#1=[count($7)], agg#2=[sum($8)], agg#3=[count($8)], agg#4=[sum($9)], agg#5=[count($9)]) + HiveJoin(condition=[AND(=($6, $0), OR(AND($1, $10), AND($2, $11), AND($3, $12)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$10], ss_addr_sk=[$13], ss_quantity=[$14], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[$17], BETWEEN6=[$18], BETWEEN7=[$19]) + HiveJoin(condition=[AND(=($0, $11), OR(AND($1, $2, $20, $8), AND($3, $4, $21, $9), AND($5, $6, $22, $9)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) + HiveFilter(condition=[IN($3, 3, 1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100, 200)], BETWEEN9=[BETWEEN(false, $22, 150, 300)], BETWEEN10=[BETWEEN(false, $22, 50, 250)], BETWEEN11=[BETWEEN(false, $13, 100, 150)], BETWEEN12=[BETWEEN(false, $13, 50, 100)], BETWEEN13=[BETWEEN(false, $13, 150, 200)]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 100, 200), BETWEEN(false, $22, 150, 300), BETWEEN(false, $22, 50, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out new file mode 100644 index 0000000000..457cdce755 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -0,0 +1,612 @@ +Warning: Shuffle Join MERGEJOIN[1431][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1433][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[1456][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[1435][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1469][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product +PREHOOK: query: explain cbo +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], groups=[[{0, 1, 2, 3}, {0, 1, 2}, {0, 1}, {0}, {}]], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(channel=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], sales=[$4], number_sales=[$5]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store'], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web'], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], sales=[$3], number_sales=[$4]) + HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) + HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(quantity=[$0], list_price=[$1]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)]) + HiveProject($f0=[*(CAST($0):DECIMAL(10, 0), $1)]) + HiveUnion(all=[true]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(quantity=[$1], list_price=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1998, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out new file mode 100644 index 0000000000..ed23e969e1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: explain cbo +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveAggregate(group=[{2}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sales_price=[$2], ca_zip=[$7]) + HiveJoin(condition=[AND(=($1, $4), OR($8, $3, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_sales_price=[$21], >=[>($21, 500)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_zip=[$9], IN=[IN(substr($9, 1, 5), _UTF-16LE'85669', _UTF-16LE'86197', _UTF-16LE'88274', _UTF-16LE'83405', _UTF-16LE'86475', _UTF-16LE'85392', _UTF-16LE'85460', _UTF-16LE'80348', _UTF-16LE'81792')], IN3=[IN($8, _UTF-16LE'CA', _UTF-16LE'WA', _UTF-16LE'GA')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out new file mode 100644 index 0000000000..ddbf80a66d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out @@ -0,0 +1,102 @@ +PREHOOK: query: explain cbo +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col cs_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[=($8, _UTF-16LE'NY')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$10], cs_call_center_sk=[$11], cs_warehouse_sk=[$14], cs_order_number=[$17], cs_ext_ship_cost=[$28], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($10), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00, 2001-05-31 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) + HiveFilter(condition=[IN($25, _UTF-16LE'Ziebach County', _UTF-16LE'Levy County', _UTF-16LE'Huron County', _UTF-16LE'Franklin Parish', _UTF-16LE'Daviess County')]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(cs_warehouse_sk=[$14], cs_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($14)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) + HiveProject(cr_order_number0=[$16], $f1=[true]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out new file mode 100644 index 0000000000..9a35b28853 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], s_state=[$2], store_sales_quantitycount=[$3], store_sales_quantityave=[/(CAST($4):DOUBLE, $3)], store_sales_quantitystdev=[POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5)], store_sales_quantitycov=[/(POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5), /(CAST($4):DOUBLE, $3))], as_store_returns_quantitycount=[$7], as_store_returns_quantityave=[/(CAST($8):DOUBLE, $7)], as_store_returns_quantitystdev=[POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5)], store_returns_quantitycov=[/(POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5), /(CAST($8):DOUBLE, $7))], catalog_sales_quantitycount=[$11], catalog_sales_quantityave=[/(CAST($12):DOUBLE, $11)], catalog_sales_quantitystdev=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))], catalog_sales_quantitycov=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[sum($3)], agg#2=[sum($7)], agg#3=[sum($6)], agg#4=[count($4)], agg#5=[sum($4)], agg#6=[sum($9)], agg#7=[sum($8)], agg#8=[count($5)], agg#9=[sum($5)], agg#10=[sum($11)], agg#11=[sum($10)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$7], $f3=[$4], $f4=[$5], $f5=[$6], $f30=[CAST($4):DOUBLE], $f7=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)], $f40=[CAST($5):DOUBLE], $f9=[*(CAST($5):DOUBLE, CAST($5):DOUBLE)], $f50=[CAST($6):DOUBLE], $f11=[*(CAST($6):DOUBLE, CAST($6):DOUBLE)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_item_sk=[$0], ss_quantity=[$2], sr_return_quantity=[$3], cs_quantity=[$5], s_state=[$8]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_store_sk=[$2], ss_quantity=[$3], sr_return_quantity=[$5], cs_sold_date_sk=[$6], cs_quantity=[$7]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$3], ss_quantity=[$5], sr_returned_date_sk=[$6], sr_return_quantity=[$10], cs_sold_date_sk=[$11], cs_quantity=[$14]) + HiveJoin(condition=[AND(=($8, $12), =($7, $13))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $8), =($1, $7)), =($4, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($15, _UTF-16LE'2000Q1')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out new file mode 100644 index 0000000000..72313e7d8b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out @@ -0,0 +1,113 @@ +Warning: Shuffle Join MERGEJOIN[139][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain cbo +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject($f0=[$3], $f1=[$2], $f2=[$1], $f3=[$0], $f4=[/($4, $5)], $f5=[/($6, $7)], $f6=[/($8, $9)], $f7=[/($10, $11)], $f8=[/($12, $13)], $f9=[/($14, $15)], $f10=[/($16, $17)]) + HiveAggregate(group=[{2, 3, 4, 14}], groups=[[{2, 3, 4, 14}, {3, 4, 14}, {4, 14}, {14}, {}]], agg#0=[sum($5)], agg#1=[count($5)], agg#2=[sum($6)], agg#3=[count($6)], agg#4=[sum($7)], agg#5=[count($7)], agg#6=[sum($8)], agg#7=[count($8)], agg#8=[sum($9)], agg#9=[count($9)], agg#10=[sum($10)], agg#11=[count($10)], agg#12=[sum($11)], agg#13=[count($11)]) + HiveJoin(condition=[=($1, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$2], ca_county=[$15], ca_state=[$16], ca_country=[$17], CAST=[$4], CAST6=[$5], CAST7=[$6], CAST8=[$7], CAST9=[$8], CAST10=[$13], CAST11=[$9]) + HiveJoin(condition=[AND(=($1, $10), =($11, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$3], cd_demo_sk0=[$11], CAST=[$4], CAST5=[$5], CAST6=[$6], CAST7=[$7], CAST8=[$8], CAST9=[$10]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'M'), =($3, _UTF-16LE'College'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(cd_demo_sk=[$0]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4], CAST=[CAST($13):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) + HiveFilter(condition=[IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query19.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query19.q.out new file mode 100644 index 0000000000..5d180700b7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query19.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], i_manufact_id=[$2], i_manufact=[$3], ext_price=[$4]) + HiveSortLimit(sort0=[$4], sort1=[$5], sort2=[$6], sort3=[$2], sort4=[$3], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(brand_id=[$0], brand=[$1], i_manufact_id=[$2], i_manufact=[$3], ext_price=[$4], (tok_table_or_col i_brand)=[$1], (tok_table_or_col i_brand_id)=[$0]) + HiveAggregate(group=[{2, 3, 4, 5}], agg#0=[sum($1)]) + HiveJoin(condition=[AND(<>($6, $8), =($0, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$5], ss_ext_sales_price=[$6], i_brand_id=[$7], i_brand=[$8], i_manufact_id=[$9], i_manufact=[$10], substr=[$3]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], substr=[substr($9, 1, 5)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_customer_sk=[$2], ss_store_sk=[$3], ss_ext_sales_price=[$4], i_brand_id=[$7], i_brand=[$8], i_manufact_id=[$9], i_manufact=[$10]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 11), =($6, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manufact_id=[$13], i_manufact=[$14]) + HiveFilter(condition=[=($20, 7)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0], substr=[substr($25, 1, 5)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out new file mode 100644 index 0000000000..c245b9bc4e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: explain cbo +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC]) + HiveProject(d_week_seq1=[$0], _o__c1=[round(/($1, $9), 2)], _o__c2=[round(/($2, $10), 2)], _o__c3=[round(/($3, $11), 2)], _o__c4=[round(/($4, $12), 2)], _o__c5=[round(/($5, $13), 2)], _o__c6=[round(/($6, $14), 2)], _o__c7=[round(/($7, $15), 2)]) + HiveJoin(condition=[=($0, $16)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) + HiveProject($f0=[$3], $f1=[CASE($4, $1, null)], $f2=[CASE($5, $1, null)], $f3=[CASE($6, $1, null)], $f4=[CASE($7, $1, null)], $f5=[CASE($8, $1, null)], $f6=[CASE($9, $1, null)], $f7=[CASE($10, $1, null)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$1]) + HiveUnion(all=[true]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(cs_sold_date_sk=[$0], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sun_sales2=[$1], mon_sales2=[$2], tue_sales2=[$3], wed_sales2=[$4], thu_sales2=[$5], fri_sales2=[$6], sat_sales2=[$7], -=[-($0, 53)]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) + HiveProject($f0=[$3], $f1=[CASE($4, $1, null)], $f2=[CASE($5, $1, null)], $f3=[CASE($6, $1, null)], $f4=[CASE($7, $1, null)], $f5=[CASE($8, $1, null)], $f6=[CASE($9, $1, null)], $f7=[CASE($10, $1, null)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$1]) + HiveUnion(all=[true]) + HiveProject(ws_sold_date_sk=[$0], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(cs_sold_date_sk=[$0], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out new file mode 100644 index 0000000000..29382b389b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out new file mode 100644 index 0000000000..c9628bfd6e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveFilter(condition=[CASE(>($2, 0), BETWEEN(false, /(CAST($3):DOUBLE, CAST($2):DOUBLE), 6.66667E-1, 1.5E0), null)]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveProject($f0=[$2], $f1=[$3], $f2=[CASE($5, $1, 0)], $f3=[CASE($6, $1, 0)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_quantity_on_hand=[$3], w_warehouse_name=[$7], i_item_id=[$5]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99, 1.49)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00, 1998-05-08 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out new file mode 100644 index 0000000000..77f91f696c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: explain cbo +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$4], sort1=[$0], sort2=[$1], sort3=[$2], sort4=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject($f0=[$3], $f1=[$0], $f2=[$1], $f3=[$2], $f4=[/(CAST($4):DOUBLE, $5)]) + HiveAggregate(group=[{1, 2, 3, 4}], groups=[[{1, 2, 3, 4}, {1, 2, 4}, {1, 4}, {4}, {}]], agg#0=[sum($7)], agg#1=[count($7)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_product_name=[$21]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out new file mode 100644 index 0000000000..9629a7112e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -0,0 +1,245 @@ +Warning: Shuffle Join MERGEJOIN[445][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[448][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[449][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 25' is a cross product +PREHOOK: query: explain cbo +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($0)]) + HiveProject(sales=[$0]) + HiveUnion(all=[true]) + HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) + HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[>($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(*=[*(0.95, $0)]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_sk=[$0]) + HiveFilter(condition=[>($1, 4)]) + HiveAggregate(group=[{4}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(sales=[*(CAST($5):DECIMAL(10, 0), $6)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(item_sk=[$0]) + HiveFilter(condition=[>($1, 4)]) + HiveAggregate(group=[{4}], agg#0=[count()]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[>($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(*=[*(0.95, $0)]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out new file mode 100644 index 0000000000..d56dd9600a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -0,0 +1,172 @@ +Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain cbo +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_last_name=[$2], c_first_name=[$1], s_store_name=[$0], $f3=[$3]) + HiveAggregate(group=[{1, 7, 8}], agg#0=[sum($9)]) + HiveProject(ca_state=[$0], s_store_name=[$1], s_state=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], c_first_name=[$7], c_last_name=[$8], $f9=[$9]) + HiveAggregate(group=[{0, 4, 5, 7, 8, 9, 10, 11, 12}], agg#0=[sum($3)]) + HiveJoin(condition=[AND(=($13, $2), =($6, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_sales_price=[$6], s_store_name=[$7], s_state=[$8], s_zip=[$9], i_current_price=[$11], i_size=[$12], i_units=[$13], i_manager_id=[$14], c_first_name=[$1], c_last_name=[$2], c_birth_country=[$3]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($14)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_sales_price=[$6], s_store_name=[$8], s_state=[$9], s_zip=[$10]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(_o__c0=[*(0.05, /($0, $1))]) + HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) + HiveProject(s_store_name=[$0], s_state=[$1], i_current_price=[$2], i_size=[$3], i_color=[$4], i_units=[$5], i_manager_id=[$6], c_first_name=[$7], c_last_name=[$8], ca_state=[$9], $f10=[$10]) + HiveAggregate(group=[{1, 2, 4, 5, 6, 7, 8, 9, 10, 12}], agg#0=[sum($0)]) + HiveJoin(condition=[AND(=($11, $14), =($3, $13))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sales_price=[$6], s_store_name=[$7], s_state=[$8], s_zip=[$9], i_current_price=[$11], i_size=[$12], i_color=[$13], i_units=[$14], i_manager_id=[$15], c_first_name=[$1], c_last_name=[$2], c_birth_country=[$3]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($14)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_sales_price=[$6], s_store_name=[$8], s_state=[$9], s_zip=[$10]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out new file mode 100644 index 0000000000..d1995b2a1f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 7, 8}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_item_sk=[$3], ss_net_profit=[$5], sr_net_loss=[$6], cs_net_profit=[$8], s_store_id=[$1], s_store_name=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($4, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_store_sk=[$2], ss_net_profit=[$3], sr_net_loss=[$5], cs_sold_date_sk=[$6], cs_net_profit=[$7]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$3], ss_net_profit=[$5], sr_returned_date_sk=[$6], sr_net_loss=[$10], cs_sold_date_sk=[$11], cs_net_profit=[$14]) + HiveJoin(condition=[AND(=($8, $12), =($7, $13))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $8), =($1, $7)), =($4, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 4), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out new file mode 100644 index 0000000000..fa2e8ad60f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain cbo +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[/(CAST($1):DOUBLE, $2)], $f2=[/($3, $4)], $f3=[/($5, $6)], $f4=[/($7, $8)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)], agg#1=[count($4)], agg#2=[sum($5)], agg#3=[count($5)], agg#4=[sum($7)], agg#5=[count($7)], agg#6=[sum($6)], agg#7=[count($6)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$2], cs_promo_sk=[$3], cs_quantity=[$4], cs_list_price=[$5], cs_sales_price=[$6], cs_coupon_amt=[$7]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_promo_sk=[$16], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'F'), =($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N'))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out new file mode 100644 index 0000000000..fdf05eb2dc --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], s_state=[$1], g_state=[grouping($10, 0)], agg1=[/(CAST($2):DOUBLE, $3)], agg2=[/($4, $5)], agg3=[/($6, $7)], agg4=[/($8, $9)]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[count($2)], agg#2=[sum($3)], agg#3=[count($3)], agg#4=[sum($4)], agg#5=[count($4)], agg#6=[sum($5)], agg#7=[count($5)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$1], $f1=[$9], $f2=[$4], $f3=[$5], $f4=[$7], $f5=[$6]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_store_sk=[$3], ss_quantity=[$4], ss_list_price=[$5], ss_sales_price=[$6], ss_coupon_amt=[$7]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_store_sk=[$7], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'U'), =($3, _UTF-16LE'2 yr Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out new file mode 100644 index 0000000000..643e5b6259 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out @@ -0,0 +1,146 @@ +Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain cbo +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f00=[$15], $f10=[$16], $f20=[$17], $f01=[$12], $f11=[$13], $f21=[$14], $f02=[$9], $f12=[$10], $f22=[$11], $f03=[$6], $f13=[$7], $f23=[$8], $f04=[$3], $f14=[$4], $f24=[$5]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 0, 5), OR(BETWEEN(false, $12, 11, 21), BETWEEN(false, $19, 460, 1460), BETWEEN(false, $11, 14, 34)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 26, 30), OR(BETWEEN(false, $12, 28, 38), BETWEEN(false, $19, 2513, 3513), BETWEEN(false, $11, 42, 62)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 21, 25), OR(BETWEEN(false, $12, 135, 145), BETWEEN(false, $19, 14180, 15180), BETWEEN(false, $11, 38, 58)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 16, 20), OR(BETWEEN(false, $12, 142, 152), BETWEEN(false, $19, 3054, 4054), BETWEEN(false, $11, 80, 100)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 11, 15), OR(BETWEEN(false, $12, 66, 76), BETWEEN(false, $19, 920, 1920), BETWEEN(false, $11, 4, 24)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($12)], agg#1=[count($12)], agg#2=[count(DISTINCT $12)]) + HiveFilter(condition=[AND(BETWEEN(false, $10, 6, 10), OR(BETWEEN(false, $12, 91, 101), BETWEEN(false, $19, 1430, 2430), BETWEEN(false, $11, 32, 52)))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out new file mode 100644 index 0000000000..ac3a0e026c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out @@ -0,0 +1,143 @@ +PREHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 7, 8}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_item_sk=[$3], ss_quantity=[$5], sr_return_quantity=[$6], cs_quantity=[$8], s_store_id=[$1], s_store_name=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($4, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_store_sk=[$2], ss_quantity=[$3], sr_return_quantity=[$5], cs_sold_date_sk=[$6], cs_quantity=[$7]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$3], ss_quantity=[$5], sr_returned_date_sk=[$6], sr_return_quantity=[$10], cs_sold_date_sk=[$11], cs_quantity=[$14]) + HiveJoin(condition=[AND(=($8, $12), =($7, $13))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $8), =($1, $7)), =($4, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 4), =($6, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 7), =($6, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query3.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query3.q.out new file mode 100644 index 0000000000..8f9528cea1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query3.q.out @@ -0,0 +1,64 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$1], dir0=[ASC], dir1=[DESC-nulls-last], dir2=[ASC], fetch=[100]) + HiveProject(d_year=[$2], i_brand_id=[$0], i_brand=[$1], $f3=[$3]) + HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) + HiveFilter(condition=[=($13, 436)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[=($8, 12)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out new file mode 100644 index 0000000000..bd68baa23b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: explain cbo +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10], sort11=[$11], sort12=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], dir10=[ASC], dir11=[ASC], dir12=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1], c_salutation=[$3], c_first_name=[$4], c_last_name=[$5], c_preferred_cust_flag=[$6], c_birth_day=[$7], c_birth_month=[$8], c_birth_year=[$9], c_birth_country=[$10], c_login=[$11], c_email_address=[$12], c_last_review_date=[$13], ctr_total_return=[$17]) + HiveJoin(condition=[=($15, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($14, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_current_addr_sk=[$4], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_day=[$11], c_birth_month=[$12], c_birth_year=[$13], c_birth_country=[$14], c_login=[$15], c_email_address=[$16], c_last_review_date=[$17]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($8, _UTF-16LE'IL')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) + HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_state=[$0]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ca_state=[$0], wr_returning_customer_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out new file mode 100644 index 0000000000..b0939360c5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: explain cbo +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(ca_county=[$0], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[$3], store_q1_q2_increase=[$4], web_q2_q3_increase=[/($10, $2)], store_q2_q3_increase=[$5]) + HiveJoin(condition=[AND(=($1, $9), CASE($6, CASE($7, >(/($10, $2), $5), $8), CASE($7, >(/($10, $2), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f02=[$0], $f33=[$11], /=[/($11, $8)], /11=[$1], /12=[$2], >=[$3], >14=[$12], >15=[$4]) + HiveJoin(condition=[AND(=($0, $10), CASE($5, CASE($9, >(/($11, $8), $1), $6), CASE($9, >(/($11, $8), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], /=[/($1, $4)], /7=[/($7, $1)], >=[$2], >9=[>(null, /($7, $1))], >10=[$5], >11=[>(null, /($1, $4))]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 3), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_county=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IS NOT NULL($7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 3), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query32.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query32.q.out new file mode 100644 index 0000000000..c7fb1dd7a8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query32.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[AND(>($2, $5), =($6, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_discount_amt=[$22]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_item_sk=[$0], CAST3=[$1], i_item_sk=[$2]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$0], CAST3=[CAST(*(1.3, /($1, $2))):DECIMAL(14, 7)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_discount_amt=[$22]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out new file mode 100644 index 0000000000..e8824ddf54 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out @@ -0,0 +1,238 @@ +PREHOOK: query: explain cbo +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books'), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books'), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books'), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out new file mode 100644 index 0000000000..97acb6e46c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out @@ -0,0 +1,99 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[DESC-nulls-last]) + HiveProject(c_last_name=[$3], c_first_name=[$2], c_salutation=[$1], c_preferred_cust_flag=[$4], ss_ticket_number=[$5], cnt=[$7]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveFilter(condition=[BETWEEN(false, $2, 15, 20)]) + HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{0, 2}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_ticket_number=[$4]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County', _UTF-16LE'Fairfield County', _UTF-16LE'Jackson County', _UTF-16LE'Barrow County', _UTF-16LE'Pennington County')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), null))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out new file mode 100644 index 0000000000..e79c6b7e51 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out @@ -0,0 +1,173 @@ +PREHOOK: query: explain cbo +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o__c4=[$4], _o__c5=[$5], _o__c6=[$6], cd_dep_employed_count=[$7], cnt2=[$8], _o__c9=[$9], _o__c10=[$10], _o__c11=[$11], cd_dep_college_count=[$12], cnt3=[$13], _o__c14=[$14], _o__c15=[$15], _o__c16=[$16]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$17], sort4=[$7], sort5=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], fetch=[100]) + HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$6], _o__c4=[/(CAST($7):DOUBLE, $8)], _o__c5=[$9], _o__c6=[$7], cd_dep_employed_count=[$4], cnt2=[$6], _o__c9=[/(CAST($10):DOUBLE, $11)], _o__c10=[$12], _o__c11=[$10], cd_dep_college_count=[$5], cnt3=[$6], _o__c14=[/(CAST($13):DOUBLE, $14)], _o__c15=[$15], _o__c16=[$13], (tok_table_or_col cd_dep_count)=[$3]) + HiveAggregate(group=[{4, 6, 7, 8, 9, 10}], agg#0=[count()], agg#1=[sum($8)], agg#2=[count($8)], agg#3=[max($8)], agg#4=[sum($9)], agg#5=[count($9)], agg#6=[max($9)], agg#7=[sum($10)], agg#8=[count($10)], agg#9=[max($10)]) + HiveFilter(condition=[OR(IS NOT NULL($12), IS NOT NULL($14))]) + HiveJoin(condition=[=($0, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $11)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $11)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), <($10, 4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out new file mode 100644 index 0000000000..5b220fc8bd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain cbo +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(gross_margin=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(gross_margin=[/($2, $3)], i_category=[$0], i_class=[$1], lochierarchy=[+(grouping($4, 1), grouping($4, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($4, 1), grouping($4, 0)), CASE(=(grouping($4, 0), 0), $0, null) ORDER BY /($2, $3) NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col i_category))=[CASE(=(+(grouping($4, 1), grouping($4, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], GROUPING__ID=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$4], $f1=[$3], $f2=[$2], $f3=[$1]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2], ss_ext_sales_price=[$3], ss_net_profit=[$4], i_class=[$7], i_category=[$8]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1999)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC', _UTF-16LE'AL', _UTF-16LE'GA')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query37.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query37.q.out new file mode 100644 index 0000000000..5487050af5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query37.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) + HiveAggregate(group=[{2, 3, 4}]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$3], i_item_id=[$4], i_item_desc=[$5], i_current_price=[$6]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) + HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00, 2001-08-01 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) + HiveFilter(condition=[AND(IN($13, 678, 964, 918, 849), BETWEEN(false, $5, 22, 52))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out new file mode 100644 index 0000000000..01e87d2827 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out @@ -0,0 +1,108 @@ +PREHOOK: query: explain cbo +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out new file mode 100644 index 0000000000..272bedcbce --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query39.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: explain cbo +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[$2], cov=[$3], w_warehouse_sk1=[$4], i_item_sk1=[$5], d_moy1=[CAST(5):INTEGER], mean1=[$6], cov1=[$7]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$6], sort5=[$7], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC]) + HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[$6], cov=[$7], w_warehouse_sk0=[$0], i_item_sk0=[$1], mean0=[$2], cov0=[$3]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[/(CAST($5):DOUBLE, $4)], cov=[CASE(=(/(CAST($5):DOUBLE, $4), 0), null, /(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)))]) + HiveFilter(condition=[CASE(=(/(CAST($5):DOUBLE, $4), 0), false, >(/(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)), 1))]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) + HiveProject($f0=[$6], $f1=[$5], $f2=[$3], $f4=[$2], $f40=[CAST($2):DOUBLE], $f6=[*(CAST($2):DOUBLE, CAST($2):DOUBLE)]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], inv_item_sk=[$1]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 5))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], mean=[/(CAST($5):DOUBLE, $4)], cov=[CASE(=(/(CAST($5):DOUBLE, $4), 0), null, /(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)))]) + HiveFilter(condition=[CASE(=(/(CAST($5):DOUBLE, $4), 0), false, >(/(POWER(/(-($2, /(*($3, $3), $4)), CASE(=($4, 1), null, -($4, 1))), 0.5), /(CAST($5):DOUBLE, $4)), 1))]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) + HiveProject($f0=[$6], $f1=[$5], $f2=[$3], $f4=[$2], $f40=[CAST($2):DOUBLE], $f6=[*(CAST($2):DOUBLE, CAST($2):DOUBLE)]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], inv_item_sk=[$1]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + +PREHOOK: query: with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out new file mode 100644 index 0000000000..d8582bda09 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out @@ -0,0 +1,314 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$3]) + HiveJoin(condition=[AND(=($2, $0), CASE($5, CASE($6, >($7, /($1, $4)), >(null, /($1, $4))), $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$2], customer_preferred_cust_flag0=[$3], year_total3=[$10], CAST=[$11], CAST41=[$5], /=[/($1, $4)], CASE=[CASE($5, >(/($1, $4), null), null)]) + HiveJoin(condition=[AND(=($2, $0), CASE($6, CASE($5, >(/($1, $4), $7), $8), CASE($5, >(/($1, $4), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$3], customer_preferred_cust_flag0=[$1], year_total1=[$7], CAST=[$8], CAST25=[$5], /=[/($2, $4)], >=[>(null, /($2, $4))]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f3=[$3], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out new file mode 100644 index 0000000000..5453c7cf8e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out @@ -0,0 +1,90 @@ +PREHOOK: query: explain cbo +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveProject($f0=[$2], $f1=[$8], $f2=[CASE($5, $3, 0)], $f3=[CASE($6, $3, 0)]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$2], w_state=[$9], -=[-($4, CASE(IS NOT NULL($7), $7, 0))]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($2, $5))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$14], cs_item_sk=[$15], cs_order_number=[$17], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(w_warehouse_sk=[$0], w_state=[$10]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00, 1998-05-08 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99, 1.49)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out new file mode 100644 index 0000000000..8f2f79f346 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject(d_year=[CAST(1998):INTEGER], i_category_id=[$0], i_category=[$1], _o__c3=[$2]) + HiveSortLimit(sort0=[$3], sort1=[$0], sort2=[$1], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC]) + HiveProject(i_category_id=[$0], i_category=[$1], _o__c3=[$2], (tok_function sum (tok_table_or_col ss_ext_sales_price))=[$2]) + HiveAggregate(group=[{5, 6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 12), =($6, 1998))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) + HiveProject(i_item_sk=[$0], i_category_id=[$11], i_category=[$12]) + HiveFilter(condition=[=($20, 1)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out new file mode 100644 index 0000000000..57019d0657 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: explain cbo +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], fetch=[100]) + HiveProject(s_store_name=[$1], s_store_id=[$0], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{9, 10}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) + HiveProject(ss_store_sk=[$1], CASE=[CASE($4, $2, null)], CASE2=[CASE($5, $2, null)], CASE3=[CASE($6, $2, null)], CASE4=[CASE($7, $2, null)], CASE5=[CASE($8, $2, null)], CASE6=[CASE($9, $2, null)], CASE7=[CASE($10, $2, null)], s_store_sk=[$11], s_store_id=[$12], s_store_name=[$13]) + HiveJoin(condition=[=($11, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], ==[=($14, _UTF-16LE'Sunday')], =2=[=($14, _UTF-16LE'Monday')], =3=[=($14, _UTF-16LE'Tuesday')], =4=[=($14, _UTF-16LE'Wednesday')], =5=[=($14, _UTF-16LE'Thursday')], =6=[=($14, _UTF-16LE'Friday')], =7=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveFilter(condition=[=($27, -6)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query44.q.out new file mode 100644 index 0000000000..8cc89f6df2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query44.q.out @@ -0,0 +1,113 @@ +Warning: Shuffle Join MERGEJOIN[101][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain cbo +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(rnk=[$3], best_performing=[$1], worst_performing=[$5]) + HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveTableScan(table=[[default, item]], table:alias=[i1]) + HiveProject(item_sk=[$0], rank_window_0=[$1]) + HiveFilter(condition=[<($1, 11)]) + HiveProject(item_sk=[$0], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY $1 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{2}], agg#0=[sum($22)], agg#1=[count($22)]) + HiveFilter(condition=[=($7, 410)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[ss1]) + HiveProject(*=[*(0.9, /($1, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveProject($f0=[true], $f1=[$22]) + HiveFilter(condition=[AND(=($7, 410), IS NULL($5))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$1], item_sk=[$2], rank_window_0=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveTableScan(table=[[default, item]], table:alias=[i2]) + HiveProject(item_sk=[$0], rank_window_0=[$1]) + HiveFilter(condition=[<($1, 11)]) + HiveProject(item_sk=[$0], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY $1 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[/($1, $2)]) + HiveAggregate(group=[{2}], agg#0=[sum($22)], agg#1=[count($22)]) + HiveFilter(condition=[=($7, 410)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[ss1]) + HiveProject(*=[*(0.9, /($1, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveProject($f0=[true], $f1=[$22]) + HiveFilter(condition=[AND(=($7, 410), IS NULL($5))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query45.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query45.q.out new file mode 100644 index 0000000000..85f8116c2f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query45.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ca_zip=[$1], ca_county=[$0], $f2=[$2]) + HiveAggregate(group=[{7, 8}], agg#0=[sum($3)]) + HiveFilter(condition=[OR(IN(substr($8, 1, 5), _UTF-16LE'85669', _UTF-16LE'86197', _UTF-16LE'88274', _UTF-16LE'83405', _UTF-16LE'86475', _UTF-16LE'85392', _UTF-16LE'85460', _UTF-16LE'80348', _UTF-16LE'81792'), IS NOT NULL($15))]) + HiveProject(ws_sold_date_sk=[$9], ws_item_sk=[$10], ws_bill_customer_sk=[$11], ws_sales_price=[$12], c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_county=[$3], ca_zip=[$4], d_date_sk=[$13], d_year=[$14], d_qoy=[$15], i_item_sk=[$5], i_item_id=[$6], i_item_id0=[$7], i1160=[$8]) + HiveJoin(condition=[=($11, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2], i1160=[$3], ws_sold_date_sk=[$4], ws_item_sk=[$5], ws_bill_customer_sk=[$6], ws_sales_price=[$7], d_date_sk=[$8], d_year=[$9], d_qoy=[$10]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0], i1160=[true]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($0, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_customer_sk=[$2], ws_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_qoy=[$6]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_qoy=[CAST(2):INTEGER]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out new file mode 100644 index 0000000000..7c5a976be4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(c_last_name=[$3], c_first_name=[$2], ca_city=[$5], bought_city=[$8], ss_ticket_number=[$6], amt=[$9], profit=[$10]) + HiveJoin(condition=[AND(<>($5, $8), =($7, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveTableScan(table=[[default, customer_address]], table:alias=[current_addr]) + HiveProject(ss_ticket_number=[$3], ss_customer_sk=[$1], bought_city=[$0], amt=[$4], profit=[$5]) + HiveAggregate(group=[{1, 2, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_ticket_number=[$5], ss_coupon_amt=[$6], ss_net_profit=[$7]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($7, 6, 0), IN($6, 1998, 1999, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[OR(=($3, 2), =($4, 1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out new file mode 100644 index 0000000000..ad48929dfb --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], sum_sales=[$4], psum=[$5], nsum=[$6]) + HiveSortLimit(sort0=[$7], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$12], d_year=[$16], d_moy=[$17], avg_monthly_sales=[$19], sum_sales=[$18], psum=[$10], nsum=[$4], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($18, $19)]) + HiveJoin(condition=[AND(AND(AND(AND(=($12, $0), =($13, $1)), =($14, $2)), =($15, $3)), =($20, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], -=[-($5, 1)]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 4, 5, 7, 8}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$1], i_category=[$2], ss_store_sk=[$5], ss_sales_price=[$6], d_year=[$8], d_moy=[$9]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[AND(AND(AND(AND(=($6, $0), =($7, $1)), =($8, $2)), =($9, $3)), =($14, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[+($5, 1)]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 4, 5, 7, 8}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$1], i_category=[$2], ss_store_sk=[$5], ss_sales_price=[$6], d_year=[$8], d_moy=[$9]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) + HiveFilter(condition=[AND(=($4, 2000), >($7, 0), CASE(>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1), null), IS NOT NULL($8))]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $1, $0, $4, $5, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 4, 5, 7, 8}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$1], i_category=[$2], ss_store_sk=[$5], ss_sales_price=[$6], d_year=[$8], d_moy=[$9]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out new file mode 100644 index 0000000000..7dfa2cb427 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: explain cbo +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$5], ss_quantity=[$8]) + HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $9), AND($2, $10), AND($3, $11)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0, 2000)], BETWEEN6=[BETWEEN(false, $22, 150, 3000)], BETWEEN7=[BETWEEN(false, $22, 50, 25000)]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 0, 2000), BETWEEN(false, $22, 150, 3000), BETWEEN(false, $22, 50, 25000)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out new file mode 100644 index 0000000000..4fddcdfd66 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out @@ -0,0 +1,330 @@ +PREHOOK: query: explain cbo +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveAggregate(group=[{0, 1, 2, 3, 4}]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveAggregate(group=[{0, 1, 2, 3, 4}]) + HiveProject(channel=[$0], item=[$1], return_ratio=[$2], return_rank=[$3], currency_rank=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'web'], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) + HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) + HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(ws_item_sk=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveProject(ws_sold_date_sk=[$4], ws_item_sk=[$5], CASE=[CASE(IS NOT NULL($2), $2, 0)], CASE3=[CASE(IS NOT NULL($7), $7, 0)], CASE4=[CASE(IS NOT NULL($3), $3, 0)], CASE5=[CASE(IS NOT NULL($8), $8, 0)], d_date_sk=[$10]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveFilter(condition=[>($15, 10000)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_net_paid=[$29], ws_net_profit=[$33]) + HiveFilter(condition=[AND(>($33, 1), >($29, 0), >($18, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) + HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) + HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(cs_item_sk=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveProject(cs_sold_date_sk=[$4], cs_item_sk=[$5], CASE=[CASE(IS NOT NULL($2), $2, 0)], CASE3=[CASE(IS NOT NULL($7), $7, 0)], CASE4=[CASE(IS NOT NULL($3), $3, 0)], CASE5=[CASE(IS NOT NULL($8), $8, 0)], d_date_sk=[$10]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveFilter(condition=[>($18, 10000)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_net_paid=[$29], cs_net_profit=[$33]) + HiveFilter(condition=[AND(>($33, 1), >($29, 0), >($18, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'store'], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) + HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) + HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(ss_item_sk=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveProject(ss_sold_date_sk=[$4], ss_item_sk=[$5], CASE=[CASE(IS NOT NULL($2), $2, 0)], CASE3=[CASE(IS NOT NULL($7), $7, 0)], CASE4=[CASE(IS NOT NULL($3), $3, 0)], CASE5=[CASE(IS NOT NULL($8), $8, 0)], d_date_sk=[$10]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveFilter(condition=[>($11, 10000)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[sr]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_net_paid=[$20], ss_net_profit=[$22]) + HiveFilter(condition=[AND(>($22, 1), >($20, 0), >($10, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out new file mode 100644 index 0000000000..54f3dd6a10 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out @@ -0,0 +1,339 @@ +PREHOOK: query: explain cbo +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_page +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_page +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[||(_UTF-16LE'store', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(store_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(store_sk=[$7], date_sk=[$0], sales_price=[$15], profit=[$22], return_amt=[CAST(0):DECIMAL(7, 2)], net_loss=[CAST(0):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(store_sk=[$7], date_sk=[$0], sales_price=[CAST(0):DECIMAL(7, 2)], profit=[CAST(0):DECIMAL(7, 2)], return_amt=[$11], net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-08-18 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[||(_UTF-16LE'catalog_page', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)], agg#3=[sum($7)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) + HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0):DECIMAL(7, 2)], net_loss=[CAST(0):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[CAST(0):DECIMAL(7, 2)], profit=[CAST(0):DECIMAL(7, 2)], return_amt=[$18], net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-08-18 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wsr_web_site_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(wsr_web_site_sk=[$13], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0):DECIMAL(7, 2)], net_loss=[CAST(0):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ws_web_site_sk=[$1], wr_returned_date_sk=[$3], $f2=[CAST(0):DECIMAL(7, 2)], $f3=[CAST(0):DECIMAL(7, 2)], wr_return_amt=[$6], wr_net_loss=[$7]) + HiveJoin(condition=[AND(=($4, $0), =($5, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$3], ws_web_site_sk=[$13], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-08-18 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out new file mode 100644 index 0000000000..88262b4d2b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: explain cbo +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], fetch=[100]) + HiveProject(s_store_name=[$0], s_company_id=[$1], s_street_number=[$2], s_street_name=[$3], s_street_type=[$4], s_suite_number=[$5], s_city=[$6], s_county=[$7], s_state=[$8], s_zip=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}], agg#0=[sum($11)], agg#1=[sum($12)], agg#2=[sum($13)], agg#3=[sum($14)], agg#4=[sum($15)]) + HiveJoin(condition=[=($0, $16)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$5], s_store_name=[$10], s_company_id=[$11], s_street_number=[$12], s_street_name=[$13], s_street_type=[$14], s_suite_number=[$15], s_city=[$16], s_county=[$17], s_state=[$18], s_zip=[$19], CASE=[CASE(<=(-($5, $0), 30), 1, 0)], CASE13=[CASE(AND(>(-($5, $0), 30), <=(-($5, $0), 60)), 1, 0)], CASE14=[CASE(AND(>(-($5, $0), 60), <=(-($5, $0), 90)), 1, 0)], CASE15=[CASE(AND(>(-($5, $0), 90), <=(-($5, $0), 120)), 1, 0)], CASE16=[CASE(>(-($5, $0), 120), 1, 0)]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($4, $8), =($1, $6)), =($2, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_id=[$16], s_street_number=[$18], s_street_name=[$19], s_street_type=[$20], s_suite_number=[$21], s_city=[$22], s_county=[$23], s_state=[$24], s_zip=[$25]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out new file mode 100644 index 0000000000..3d81b6bcb5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query51.q.out @@ -0,0 +1,125 @@ +PREHOOK: query: explain cbo +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_sk=[$0], d_date=[$1], web_sales=[$2], store_sales=[$3], max_window_0=[$4], max_window_1=[$5]) + HiveFilter(condition=[>($4, $5)]) + HiveProject(item_sk=[CASE(IS NOT NULL($3), $3, $0)], d_date=[CASE(IS NOT NULL($4), $4, $1)], web_sales=[$5], store_sales=[$2], max_window_0=[max($5) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], max_window_1=[max($2) OVER (PARTITION BY CASE(IS NOT NULL($3), $3, $0) ORDER BY CASE(IS NOT NULL($4), $4, $1) NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)]) + HiveJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[full], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) + HiveProject((tok_table_or_col ss_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) + HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[$2]) + HiveProject((tok_table_or_col ws_item_sk)=[$0], (tok_table_or_col d_date)=[$1], sum_window_0=[sum($2) OVER (PARTITION BY $0 ORDER BY $1 NULLS LAST ROWS BETWEEN CURRENT ROW AND 2147483647 PRECEDING)], window_col_0=[$2]) + HiveProject(ws_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_sales_price=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query52.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query52.q.out new file mode 100644 index 0000000000..ab2db4ba84 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query52.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(d_year=[CAST(1998):INTEGER], brand_id=[$0], brand=[$1], ext_price=[$2]) + HiveSortLimit(sort0=[$2], sort1=[$0], dir0=[DESC-nulls-last], dir1=[ASC], fetch=[100]) + HiveProject(i_brand_id=[$0], i_brand=[$1], $f2=[$2]) + HiveAggregate(group=[{5, 6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 12), =($6, 1998))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) + HiveFilter(condition=[=($20, 1)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out new file mode 100644 index 0000000000..bac3f77538 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out @@ -0,0 +1,83 @@ +PREHOOK: query: explain cbo +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1), null)]) + HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_manufact_id=[$0], d_qoy=[$1], $f2=[$2]) + HiveAggregate(group=[{4, 6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_qoy=[$10]) + HiveFilter(condition=[IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out new file mode 100644 index 0000000000..1e1adac85c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -0,0 +1,213 @@ +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +PREHOOK: query: explain cbo +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(segment=[$0], num_customers=[$1], segment_base=[*($0, 50)]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject(segment=[CAST(/($1, CAST(50):DECIMAL(10, 0))):INTEGER]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveFilter(condition=[BETWEEN(false, $2, $3, $4)]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], ss_ext_sales_price=[$4], d_month_seq=[$11], _o__c0=[$13]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$10], $f1=[$11], ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_ext_sales_price=[$2], ca_address_sk=[$5], ca_county=[$6], ca_state=[$7], s_county=[$8], s_state=[$9], d_date_sk=[$3], d_month_seq=[$4], cnt=[$12], $f00=[$13]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($10, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) + HiveAggregate(group=[{0, 1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(customer_sk=[$1]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 3), =($6, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 3)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 3)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query55.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query55.q.out new file mode 100644 index 0000000000..6af451cf4b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query55.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], ext_price=[$2]) + HiveSortLimit(sort0=[$2], sort1=[$3], dir0=[DESC-nulls-last], dir1=[ASC], fetch=[100]) + HiveProject(brand_id=[$0], brand=[$1], ext_price=[$2], (tok_table_or_col i_brand_id)=[$0]) + HiveAggregate(group=[{5, 6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) + HiveFilter(condition=[=($20, 36)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out new file mode 100644 index 0000000000..a13e599b34 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: explain cbo +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out new file mode 100644 index 0000000000..e06078f0a9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out @@ -0,0 +1,174 @@ +PREHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_sales=[$4], sum_sales=[$5], psum=[$6], nsum=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$10], i_brand=[$11], d_year=[$13], d_moy=[$14], avg_monthly_sales=[$16], sum_sales=[$15], psum=[$8], nsum=[$3], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($15, $16)]) + HiveJoin(condition=[AND(AND(AND(=($10, $0), =($11, $1)), =($12, $2)), =($17, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], -=[-($4, 1)]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 4, 5, 7}], agg#0=[sum($3)]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$1], i_category=[$2], cs_call_center_sk=[$4], cs_sales_price=[$6], d_year=[$8], d_moy=[$9]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($6)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveJoin(condition=[AND(AND(AND(=($5, $0), =($6, $1)), =($7, $2)), =($12, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[+($4, 1)]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 4, 5, 7}], agg#0=[sum($3)]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$1], i_category=[$2], cs_call_center_sk=[$4], cs_sales_price=[$6], d_year=[$8], d_moy=[$9]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($6)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) + HiveFilter(condition=[AND(=($3, 2000), >($6, 0), CASE(>($6, 0), >(/(ABS(-($5, $6)), $6), 0.1), null), IS NOT NULL($7))]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $1, $0, $4, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 4, 5, 7}], agg#0=[sum($3)]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$1], i_category=[$2], cs_call_center_sk=[$4], cs_sales_price=[$6], d_year=[$8], d_moy=[$9]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($6)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out new file mode 100644 index 0000000000..df67f6fa48 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out @@ -0,0 +1,237 @@ +Warning: Shuffle Join MERGEJOIN[401][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product +PREHOOK: query: explain cbo +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, $1), $9), CAST(3):DECIMAL(10, 0))]) + HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, $11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(item_id=[$0], cs_item_rev=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_id=[$0], ss_item_rev=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_id=[$0], ws_item_rev=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out new file mode 100644 index 0000000000..ad2568dd5f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: explain cbo +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(s_store_name1=[$0], s_store_id1=[$2], d_week_seq1=[$1], _o__c3=[/($3, $11)], _o__c4=[/($4, $12)], _o__c5=[$9], _o__c6=[/($5, $13)], _o__c7=[/($6, $14)], _o__c8=[/($7, $15)], _o__c9=[/($8, $16)]) + HiveJoin(condition=[AND(=($2, $10), =($1, $17))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_name1=[$2], d_week_seq1=[$3], s_store_id1=[$1], sun_sales1=[$5], mon_sales1=[$6], wed_sales1=[$8], thu_sales1=[$9], fri_sales1=[$10], sat_sales1=[$11], /=[/($7, $7)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null)], $f3=[CASE($6, $2, null)], $f4=[CASE($7, $2, null)], $f5=[CASE($8, $2, null)], $f6=[CASE($9, $2, null)], $f7=[CASE($10, $2, null)], $f8=[CASE($11, $2, null)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject(s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9], -=[-($2, 52)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null)], $f3=[CASE($6, $2, null)], $f4=[CASE($7, $2, null)], $f5=[CASE($8, $2, null)], $f6=[CASE($9, $2, null)], $f7=[CASE($10, $2, null)], $f8=[CASE($11, $2, null)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out new file mode 100644 index 0000000000..776861ae69 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out @@ -0,0 +1,109 @@ +Warning: Shuffle Join MERGEJOIN[175][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain cbo +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(ca_state=[$0], $f1=[$1]) + HiveFilter(condition=[>=($1, 10)]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveJoin(condition=[AND(=($3, $2), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_state=[$9], i_current_price=[$4], i_category=[$5]) + HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[s]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($12)]) + HiveTableScan(table=[[default, item]], table:alias=[i]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_state=[$3]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveTableScan(table=[[default, customer_address]], table:alias=[a]) + HiveProject(d_date_sk=[$0], d_month_seq=[$1], d_month_seq0=[$2], cnt=[$3]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_month_seq=[$0]) + HiveAggregate(group=[{3}]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject(d_month_seq=[$0]) + HiveAggregate(group=[{3}]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_category=[$0], *=[*(1.2, CAST(/($1, $2)):DECIMAL(16, 6))]) + HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)]) + HiveFilter(condition=[IS NOT NULL($12)]) + HiveTableScan(table=[[default, item]], table:alias=[j]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out new file mode 100644 index 0000000000..ea098f7567 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($7)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out new file mode 100644 index 0000000000..e9d8ccf8bc --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out @@ -0,0 +1,166 @@ +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 9' is a cross product +PREHOOK: query: explain cbo +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(promotions=[$0], total=[$2], _o__c2=[*(/($1, $3), CAST(100):DECIMAL(10, 0))]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(promotions=[$0], CAST=[CAST($0):DECIMAL(15, 4)]) + HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$3], ss_ext_sales_price=[$5], c_current_addr_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_ext_sales_price=[$5]) + HiveJoin(condition=[=($4, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($27, -7)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[OR(=($8, _UTF-16LE'Y'), =($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y'))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($12, _UTF-16LE'Electronics')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(total=[$0], CAST=[CAST($0):DECIMAL(15, 4)]) + HiveAggregate(group=[{}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$3], ss_ext_sales_price=[$5]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_ext_sales_price=[$4]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($27, -7)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($12, _UTF-16LE'Electronics')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out new file mode 100644 index 0000000000..e22d812f78 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out @@ -0,0 +1,85 @@ +PREHOOK: query: explain cbo +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1), null)]) + HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_manager_id=[$0], d_moy=[$1], $f2=[$2]) + HiveAggregate(group=[{4, 6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_manager_id=[$20]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_moy=[$8]) + HiveFilter(condition=[IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out new file mode 100644 index 0000000000..ef80bf61d3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out @@ -0,0 +1,402 @@ +Warning: Shuffle Join MERGEJOIN[932][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[933][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[947][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[948][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 25' is a cross product +PREHOOK: query: explain cbo +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number=[$3], b_streen_name=[$4], b_city=[$5], b_zip=[$6], c_street_number=[$7], c_street_name=[$8], c_city=[$9], c_zip=[$10], syear=[CAST(2000):INTEGER], cnt=[$11], s1=[$12], s2=[$13], s3=[$14], s11=[$15], s21=[$16], s31=[$17], syear1=[CAST(2001):INTEGER], cnt1=[$18]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$18], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + HiveProject(product_name=[$0], store_name=[$2], store_zip=[$3], b_street_number=[$4], b_streen_name=[$5], b_city=[$6], b_zip=[$7], c_street_number=[$8], c_street_name=[$9], c_city=[$10], c_zip=[$11], cnt=[$12], s1=[$13], s2=[$14], s3=[$15], s11=[$20], s21=[$21], s31=[$22], cnt1=[$19]) + HiveJoin(condition=[AND(AND(AND(=($1, $16), <=($19, $12)), =($2, $17)), =($3, $18))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$9], $f1=[$8], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$10], $f9=[$11], $f10=[$12], $f11=[$13], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19}], agg#0=[count()], agg#1=[sum($1)], agg#2=[sum($2)], agg#3=[sum($3)]) + HiveJoin(condition=[=($8, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_wholesale_cost=[$2], ss_list_price=[$3], ss_coupon_amt=[$4], d_year0=[$5], d_year1=[$6], s_store_name=[$7], s_zip=[$8], c_current_addr_sk=[$10], ca_street_number=[$13], ca_street_name=[$14], ca_city=[$15], ca_zip=[$16]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($9, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_addr_sk=[$2], ss_wholesale_cost=[$3], ss_list_price=[$4], ss_coupon_amt=[$5], d_year0=[$6], d_year1=[$7], s_store_name=[$8], s_zip=[$9], c_current_hdemo_sk=[$11], c_current_addr_sk=[$12]) + HiveJoin(condition=[AND(=($10, $15), <>($13, $16))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_hdemo_sk=[$3], ss_addr_sk=[$4], ss_wholesale_cost=[$5], ss_list_price=[$6], ss_coupon_amt=[$7], d_year0=[$9], d_year1=[$11], s_store_name=[$12], s_zip=[$13], c_current_cdemo_sk=[$15], c_current_hdemo_sk=[$16], c_current_addr_sk=[$17], cd_marital_status=[$21]) + HiveJoin(condition=[=($2, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($1, $14), =($19, $8)), =($18, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_wholesale_cost=[$7], ss_list_price=[$8], ss_coupon_amt=[$9], d_date_sk0=[$16], d_year0=[$17], d_date_sk1=[$14], d_year1=[$15], s_store_name=[$12], s_zip=[$13]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_wholesale_cost=[$8], ss_list_price=[$9], ss_coupon_amt=[$10]) + HiveJoin(condition=[=($1, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $11), =($7, $12))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cs_item_sk=[$0]) + HiveFilter(condition=[>($1, *(2, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject($f1=[$8], $f2=[$2], $f3=[$3], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19}], agg#0=[count()], agg#1=[sum($1)], agg#2=[sum($2)], agg#3=[sum($3)]) + HiveJoin(condition=[=($8, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_wholesale_cost=[$2], ss_list_price=[$3], ss_coupon_amt=[$4], d_year0=[$5], d_year1=[$6], s_store_name=[$7], s_zip=[$8], c_current_addr_sk=[$10], ca_street_number=[$13], ca_street_name=[$14], ca_city=[$15], ca_zip=[$16]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($9, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_addr_sk=[$2], ss_wholesale_cost=[$3], ss_list_price=[$4], ss_coupon_amt=[$5], d_year0=[$6], d_year1=[$7], s_store_name=[$8], s_zip=[$9], c_current_hdemo_sk=[$11], c_current_addr_sk=[$12]) + HiveJoin(condition=[AND(=($10, $15), <>($13, $16))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_hdemo_sk=[$3], ss_addr_sk=[$4], ss_wholesale_cost=[$5], ss_list_price=[$6], ss_coupon_amt=[$7], d_year0=[$9], d_year1=[$11], s_store_name=[$12], s_zip=[$13], c_current_cdemo_sk=[$15], c_current_hdemo_sk=[$16], c_current_addr_sk=[$17], cd_marital_status=[$21]) + HiveJoin(condition=[=($2, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($1, $14), =($19, $8)), =($18, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_wholesale_cost=[$7], ss_list_price=[$8], ss_coupon_amt=[$9], d_date_sk0=[$16], d_year0=[$17], d_date_sk1=[$14], d_year1=[$15], s_store_name=[$12], s_zip=[$13]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_wholesale_cost=[$8], ss_list_price=[$9], ss_coupon_amt=[$10]) + HiveJoin(condition=[=($1, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $11), =($7, $12))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cs_item_sk=[$0]) + HiveFilter(condition=[>($1, *(2, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out new file mode 100644 index 0000000000..3e906b73b0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out @@ -0,0 +1,97 @@ +PREHOOK: query: explain cbo +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(s_store_name=[$11], i_item_desc=[$1], revenue=[$7], i_current_price=[$2], i_wholesale_cost=[$3], i_brand=[$4]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4], i_current_price=[$5], i_wholesale_cost=[$6], i_brand=[$8]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], *=[*(0.1, /($1, $2))]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out new file mode 100644 index 0000000000..58c5824ffc --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out @@ -0,0 +1,506 @@ +PREHOOK: query: explain cbo +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@warehouse +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@warehouse +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county=[$3], w_state=[$4], w_country=[$5], ship_carriers=[CAST(_UTF-16LE'DIAMOND,AIRBORNE'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], year=[CAST(2002):INTEGER], jan_sales=[$6], feb_sales=[$7], mar_sales=[$8], apr_sales=[$9], may_sales=[$10], jun_sales=[$11], jul_sales=[$12], aug_sales=[$13], sep_sales=[$14], oct_sales=[$15], nov_sales=[$16], dec_sales=[$17], jan_sales_per_sq_foot=[$18], feb_sales_per_sq_foot=[$19], mar_sales_per_sq_foot=[$20], apr_sales_per_sq_foot=[$21], may_sales_per_sq_foot=[$22], jun_sales_per_sq_foot=[$23], jul_sales_per_sq_foot=[$24], aug_sales_per_sq_foot=[$25], sep_sales_per_sq_foot=[$26], oct_sales_per_sq_foot=[$27], nov_sales_per_sq_foot=[$28], dec_sales_per_sq_foot=[$29], jan_net=[$30], feb_net=[$31], mar_net=[$32], apr_net=[$33], may_net=[$34], jun_net=[$35], jul_net=[$36], aug_net=[$37], sep_net=[$38], oct_net=[$39], nov_net=[$40], dec_net=[$41]) + HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29], $f30=[$30], $f31=[$31], $f32=[$32], $f33=[$33], $f34=[$34], $f35=[$35], $f36=[$36], $f37=[$37], $f38=[$38], $f39=[$39], $f40=[$40], $f41=[$41]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)], agg#24=[sum($30)], agg#25=[sum($31)], agg#26=[sum($32)], agg#27=[sum($33)], agg#28=[sum($34)], agg#29=[sum($35)], agg#30=[sum($36)], agg#31=[sum($37)], agg#32=[sum($38)], agg#33=[sum($39)], agg#34=[sum($40)], agg#35=[sum($41)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f8=[$6], $f9=[$7], $f10=[$8], $f11=[$9], $f12=[$10], $f13=[$11], $f14=[$12], $f15=[$13], $f16=[$14], $f17=[$15], $f18=[$16], $f19=[$17], $f20=[/($6, CAST($1):DECIMAL(10, 0))], $f21=[/($7, CAST($1):DECIMAL(10, 0))], $f22=[/($8, CAST($1):DECIMAL(10, 0))], $f23=[/($9, CAST($1):DECIMAL(10, 0))], $f24=[/($10, CAST($1):DECIMAL(10, 0))], $f25=[/($11, CAST($1):DECIMAL(10, 0))], $f26=[/($12, CAST($1):DECIMAL(10, 0))], $f27=[/($13, CAST($1):DECIMAL(10, 0))], $f28=[/($14, CAST($1):DECIMAL(10, 0))], $f29=[/($15, CAST($1):DECIMAL(10, 0))], $f30=[/($16, CAST($1):DECIMAL(10, 0))], $f31=[/($17, CAST($1):DECIMAL(10, 0))], $f32=[$18], $f33=[$19], $f34=[$20], $f35=[$21], $f36=[$22], $f37=[$23], $f38=[$24], $f39=[$25], $f40=[$26], $f41=[$27], $f42=[$28], $f43=[$29]) + HiveUnion(all=[true]) + HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county=[$3], w_state=[$4], w_country=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) + HiveAggregate(group=[{2, 3, 4, 5, 6, 7}], agg#0=[sum($8)], agg#1=[sum($9)], agg#2=[sum($10)], agg#3=[sum($11)], agg#4=[sum($12)], agg#5=[sum($13)], agg#6=[sum($14)], agg#7=[sum($15)], agg#8=[sum($16)], agg#9=[sum($17)], agg#10=[sum($18)], agg#11=[sum($19)], agg#12=[sum($20)], agg#13=[sum($21)], agg#14=[sum($22)], agg#15=[sum($23)], agg#16=[sum($24)], agg#17=[sum($25)], agg#18=[sum($26)], agg#19=[sum($27)], agg#20=[sum($28)], agg#21=[sum($29)], agg#22=[sum($30)], agg#23=[sum($31)]) + HiveJoin(condition=[=($1, $33)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $32)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_mode_sk=[$2], w_warehouse_name=[$20], w_warehouse_sq_ft=[$21], w_city=[$22], w_county=[$23], w_state=[$24], w_country=[$25], CASE=[CASE($7, $4, 0)], CASE9=[CASE($8, $4, 0)], CASE10=[CASE($9, $4, 0)], CASE11=[CASE($10, $4, 0)], CASE12=[CASE($11, $4, 0)], CASE13=[CASE($12, $4, 0)], CASE14=[CASE($13, $4, 0)], CASE15=[CASE($14, $4, 0)], CASE16=[CASE($15, $4, 0)], CASE17=[CASE($16, $4, 0)], CASE18=[CASE($17, $4, 0)], CASE19=[CASE($18, $4, 0)], CASE20=[CASE($7, $5, 0)], CASE21=[CASE($8, $5, 0)], CASE22=[CASE($9, $5, 0)], CASE23=[CASE($10, $5, 0)], CASE24=[CASE($11, $5, 0)], CASE25=[CASE($12, $5, 0)], CASE26=[CASE($13, $5, 0)], CASE27=[CASE($14, $5, 0)], CASE28=[CASE($15, $5, 0)], CASE29=[CASE($16, $5, 0)], CASE30=[CASE($17, $5, 0)], CASE31=[CASE($18, $5, 0)]) + HiveJoin(condition=[=($3, $19)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$14], ws_warehouse_sk=[$15], *=[*($21, CAST($18):DECIMAL(10, 0))], *5=[*($30, CAST($18):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($14))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], ==[=($8, 1)], =2=[=($8, 2)], =3=[=($8, 3)], =4=[=($8, 4)], =5=[=($8, 5)], =6=[=($8, 6)], =7=[=($8, 7)], =8=[=($8, 8)], =9=[=($8, 9)], =10=[=($8, 10)], =11=[=($8, 11)], =12=[=($8, 12)]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $2, 49530, 78330)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(sm_ship_mode_sk=[$0]) + HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE')]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county=[$3], w_state=[$4], w_country=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) + HiveAggregate(group=[{2, 3, 4, 5, 6, 7}], agg#0=[sum($8)], agg#1=[sum($9)], agg#2=[sum($10)], agg#3=[sum($11)], agg#4=[sum($12)], agg#5=[sum($13)], agg#6=[sum($14)], agg#7=[sum($15)], agg#8=[sum($16)], agg#9=[sum($17)], agg#10=[sum($18)], agg#11=[sum($19)], agg#12=[sum($20)], agg#13=[sum($21)], agg#14=[sum($22)], agg#15=[sum($23)], agg#16=[sum($24)], agg#17=[sum($25)], agg#18=[sum($26)], agg#19=[sum($27)], agg#20=[sum($28)], agg#21=[sum($29)], agg#22=[sum($30)], agg#23=[sum($31)]) + HiveJoin(condition=[=($1, $33)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $32)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_time_sk=[$1], cs_ship_mode_sk=[$2], w_warehouse_name=[$20], w_warehouse_sq_ft=[$21], w_city=[$22], w_county=[$23], w_state=[$24], w_country=[$25], CASE=[CASE($7, $4, 0)], CASE9=[CASE($8, $4, 0)], CASE10=[CASE($9, $4, 0)], CASE11=[CASE($10, $4, 0)], CASE12=[CASE($11, $4, 0)], CASE13=[CASE($12, $4, 0)], CASE14=[CASE($13, $4, 0)], CASE15=[CASE($14, $4, 0)], CASE16=[CASE($15, $4, 0)], CASE17=[CASE($16, $4, 0)], CASE18=[CASE($17, $4, 0)], CASE19=[CASE($18, $4, 0)], CASE20=[CASE($7, $5, 0)], CASE21=[CASE($8, $5, 0)], CASE22=[CASE($9, $5, 0)], CASE23=[CASE($10, $5, 0)], CASE24=[CASE($11, $5, 0)], CASE25=[CASE($12, $5, 0)], CASE26=[CASE($13, $5, 0)], CASE27=[CASE($14, $5, 0)], CASE28=[CASE($15, $5, 0)], CASE29=[CASE($16, $5, 0)], CASE30=[CASE($17, $5, 0)], CASE31=[CASE($18, $5, 0)]) + HiveJoin(condition=[=($3, $19)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14], *=[*($23, CAST($18):DECIMAL(10, 0))], *5=[*($32, CAST($18):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($13))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], ==[=($8, 1)], =2=[=($8, 2)], =3=[=($8, 3)], =4=[=($8, 4)], =5=[=($8, 5)], =6=[=($8, 6)], =7=[=($8, 7)], =8=[=($8, 8)], =9=[=($8, 9)], =10=[=($8, 10)], =11=[=($8, 11)], =12=[=($8, 12)]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $2, 49530, 78330)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(sm_ship_mode_sk=[$0]) + HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE')]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query67.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query67.q.out new file mode 100644 index 0000000000..41a3896053 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query67.q.out @@ -0,0 +1,118 @@ +PREHOOK: query: explain cbo +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], fetch=[100]) + HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], i_product_name=[$3], d_year=[$4], d_qoy=[$5], d_moy=[$6], s_store_id=[$7], sumsales=[$8], rank_window_0=[$9]) + HiveFilter(condition=[<=($9, 100)]) + HiveProject(i_category=[$2], i_class=[$1], i_brand=[$0], i_product_name=[$3], d_year=[$4], d_qoy=[$6], d_moy=[$5], s_store_id=[$7], sumsales=[$8], rank_window_0=[rank() OVER (PARTITION BY $2 ORDER BY $8 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], i_product_name=[$3], d_year=[$4], d_moy=[$5], d_qoy=[$6], s_store_id=[$7], $f8=[$8]) + HiveAggregate(group=[{1, 2, 3, 4, 6, 7, 8, 9}], groups=[[{1, 2, 3, 4, 6, 7, 8, 9}, {1, 2, 3, 4, 6, 7, 8}, {1, 2, 3, 4, 6, 8}, {1, 2, 3, 4, 6}, {1, 2, 3, 4}, {1, 2, 3}, {2, 3}, {3}, {}]], agg#0=[sum($10)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_product_name=[$21]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_item_sk=[$1], d_year=[$5], d_moy=[$6], d_qoy=[$7], s_store_id=[$9], CASE=[$3]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], CASE=[CASE(AND(IS NOT NULL($13), IS NOT NULL($10)), *($13, CAST($10):DECIMAL(10, 0)), 0)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8], d_qoy=[$10]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out new file mode 100644 index 0000000000..8053d25bb9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out @@ -0,0 +1,128 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(c_last_name=[$3], c_first_name=[$2], ca_city=[$5], bought_city=[$8], ss_ticket_number=[$6], extended_price=[$9], extended_tax=[$11], list_price=[$10]) + HiveJoin(condition=[AND(<>($5, $8), =($7, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveTableScan(table=[[default, customer_address]], table:alias=[current_addr]) + HiveProject(ss_ticket_number=[$3], ss_customer_sk=[$1], bought_city=[$0], extended_price=[$4], list_price=[$5], extended_tax=[$6]) + HiveAggregate(group=[{1, 2, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_city=[$6]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_ticket_number=[$5], ss_ext_sales_price=[$6], ss_ext_list_price=[$7], ss_ext_tax=[$8]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), BETWEEN(false, $9, 1, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[OR(=($3, 2), =($4, 1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out new file mode 100644 index 0000000000..1eb6a11a2a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out @@ -0,0 +1,155 @@ +PREHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(cd_gender=[$0], cd_marital_status=[$1], cd_education_status=[$2], cnt1=[$5], cd_purchase_estimate=[$3], cnt2=[$5], cd_credit_rating=[$4], cnt3=[$5]) + HiveAggregate(group=[{6, 7, 8, 9, 10}], agg#0=[count()]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($0, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$3], ca_state=[$4], cd_demo_sk=[$5], cd_gender=[$6], cd_marital_status=[$7], cd_education_status=[$8], cd_purchase_estimate=[$9], cd_credit_rating=[$10], ws_bill_customer_sk0=[$11], $f1=[$12]) + HiveFilter(condition=[IS NULL($12)]) + HiveJoin(condition=[=($0, $11)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $11)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IN($8, _UTF-16LE'CO', _UTF-16LE'IL', _UTF-16LE'MN')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 1, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 1, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 1, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out new file mode 100644 index 0000000000..685451cf00 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain cbo +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[/(CAST($1):DOUBLE, $2)], $f2=[/($3, $4)], $f3=[/($5, $6)], $f4=[/($7, $8)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)], agg#1=[count($4)], agg#2=[sum($5)], agg#3=[count($5)], agg#4=[sum($7)], agg#5=[count($7)], agg#6=[sum($6)], agg#7=[count($6)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_promo_sk=[$3], ss_quantity=[$4], ss_list_price=[$5], ss_sales_price=[$6], ss_coupon_amt=[$7]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_promo_sk=[$8], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'F'), =($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N'))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query70.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query70.q.out new file mode 100644 index 0000000000..7103b15c15 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query70.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], s_state=[$1], s_county=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(total_sum=[$2], s_state=[$0], s_county=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col s_state))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$7], $f1=[$6], $f2=[$2]) + HiveJoin(condition=[=($7, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_county=[$23], s_state=[$24]) + HiveFilter(condition=[IS NOT NULL($24)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_state=[$0]) + HiveFilter(condition=[<=($1, 5)]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[$1]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[rank() OVER (PARTITION BY $0 ORDER BY $1 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], window_col_0=[$1]) + HiveProject(s_state=[$0], $f1=[$1]) + HiveAggregate(group=[{5}], agg#0=[sum($2)]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[IS NOT NULL($24)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out new file mode 100644 index 0000000000..b5764f1d83 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4]) + HiveSortLimit(sort0=[$4], sort1=[$5], dir0=[DESC-nulls-last], dir1=[ASC]) + HiveProject(brand_id=[$2], brand=[$3], t_hour=[$0], t_minute=[$1], ext_price=[$4], (tok_table_or_col i_brand_id)=[$2]) + HiveAggregate(group=[{1, 2, 7, 8}], agg#0=[sum($3)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4]) + HiveFilter(condition=[IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner')]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) + HiveFilter(condition=[=($20, 1)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out new file mode 100644 index 0000000000..1622e1f7d9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out @@ -0,0 +1,127 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(i_item_desc=[$1], w_warehouse_name=[$0], d_week_seq=[$2], $f3=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{2, 3, 4}], agg#0=[count($5)], agg#1=[count($6)], agg#2=[count()]) + HiveProject(cs_item_sk=[$0], cs_order_number=[$2], w_warehouse_name=[$3], i_item_desc=[$4], d_week_seq=[$5], CASE=[CASE(IS NULL($6), 1, 0)], CASE6=[CASE(IS NOT NULL($6), 1, 0)], cr_item_sk=[$7], cr_order_number=[$8]) + HiveJoin(condition=[AND(=($7, $0), =($8, $2))], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$1], cs_promo_sk=[$2], cs_order_number=[$3], w_warehouse_name=[$5], i_item_desc=[$6], d_week_seq=[$7]) + HiveJoin(condition=[AND(=($0, $11), >($12, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($4, $9), =($7, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$1], cs_item_sk=[$3], cs_promo_sk=[$4], cs_order_number=[$5], inv_date_sk=[$6], w_warehouse_name=[$7], i_item_desc=[$8], d_week_seq=[$10], +=[$11]) + HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$1], cs_bill_hdemo_sk=[$3], cs_item_sk=[$4], cs_promo_sk=[$5], cs_order_number=[$6], inv_date_sk=[$7], w_warehouse_name=[$8], i_item_desc=[$11]) + HiveJoin(condition=[=($10, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$1], cs_bill_cdemo_sk=[$2], cs_bill_hdemo_sk=[$3], cs_item_sk=[$4], cs_promo_sk=[$5], cs_order_number=[$6], inv_date_sk=[$8], w_warehouse_name=[$13]) + HiveJoin(condition=[AND(=($4, $9), <($11, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'M')]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], +=[+(CAST($2):DOUBLE, 5)]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'1001-5000')]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(p_promo_sk=[$0]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out new file mode 100644 index 0000000000..f5b615bbf4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out @@ -0,0 +1,93 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last]) + HiveProject(c_last_name=[$3], c_first_name=[$2], c_salutation=[$1], c_preferred_cust_flag=[$4], ss_ticket_number=[$5], cnt=[$7]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveFilter(condition=[BETWEEN(false, $2, 1, 5)]) + HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{0, 2}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_ticket_number=[$4]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), BETWEEN(false, $9, 1, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), null))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out new file mode 100644 index 0000000000..2c61f06f7b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(customer_id=[$3], customer_first_name=[$4], customer_last_name=[$5]) + HiveJoin(condition=[AND(=($2, $0), CASE($7, CASE($8, >(/($1, $6), $9), $10), CASE($8, >(/($1, $6), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f4=[$3]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$4], customer_id0=[$0], customer_first_name0=[$1], customer_last_name0=[$2], year_total1=[$8], CAST=[$6], CAST13=[$9], /=[/($3, $5)], >=[>(null, /($3, $5))]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) + HiveFilter(condition=[>($3, 0)]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) + HiveFilter(condition=[>($3, 0)]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2001))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out new file mode 100644 index 0000000000..8e221dbd71 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$5], sales_cnt_diff=[$6], sales_amt_diff=[$7]) + HiveSortLimit(sort0=[$6], dir0=[ASC], fetch=[100]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$11], sales_cnt_diff=[-($11, $4)], sales_amt_diff=[-($12, $5)]) + HiveJoin(condition=[AND(AND(AND(AND(=($7, $0), =($8, $1)), =($9, $2)), =($10, $3)), <(/($13, $6), 0.9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f5=[$4], $f6=[$5], CAST=[CAST($4):DECIMAL(17, 2)]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$8], i_class_id=[$9], i_category_id=[$10], i_manufact_id=[$11], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_ext_sales_price=[$4], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$8], i_class_id=[$9], i_category_id=[$10], i_manufact_id=[$11], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_ext_sales_price=[$4], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$8], i_class_id=[$9], i_category_id=[$10], i_manufact_id=[$11], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_ext_sales_price=[$4], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f5=[$4], $f6=[$5], CAST=[CAST($4):DECIMAL(17, 2)]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$8], i_class_id=[$9], i_category_id=[$10], i_manufact_id=[$11], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_ext_sales_price=[$4], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$8], i_class_id=[$9], i_category_id=[$10], i_manufact_id=[$11], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_ext_sales_price=[$4], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$8], i_class_id=[$9], i_category_id=[$10], i_manufact_id=[$11], sales_cnt=[-($6, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($7, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($5, $1), =($4, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_ext_sales_price=[$4], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query76.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query76.q.out new file mode 100644 index 0000000000..1f8c55ad4f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query76.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(channel=[$0], col_name=[$1], d_year=[$2], d_qoy=[$3], i_category=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 2, 3, 4}], agg#0=[count()], agg#1=[sum($5)]) + HiveProject(channel=[$0], col_name=[$1], d_year=[$2], d_qoy=[$3], i_category=[$4], ext_sales_price=[$5]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store'], col_name=[_UTF-16LE'ss_addr_sk'], d_year=[$1], d_qoy=[$2], i_category=[$4], ext_sales_price=[$7]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(channel=[_UTF-16LE'web'], col_name=[_UTF-16LE'ws_web_page_sk'], d_year=[$6], d_qoy=[$7], i_category=[$4], ext_sales_price=[$2]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], col_name=[_UTF-16LE'cs_warehouse_sk'], d_year=[$6], d_qoy=[$7], i_category=[$4], ext_sales_price=[$2]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NULL($14), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out new file mode 100644 index 0000000000..0ac295bf8f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out @@ -0,0 +1,304 @@ +Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +PREHOOK: query: explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[$0], sales=[$1], returns=[CASE(IS NOT NULL($4), $4, 0)], profit=[-($2, CASE(IS NOT NULL($5), $5, 0))]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveProject(ss_store_sk=[CAST($1):INTEGER NOT NULL], ss_ext_sales_price=[$2], ss_net_profit=[$3]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sr_store_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveProject(sr_store_sk=[CAST($1):INTEGER NOT NULL], sr_return_amt=[$2], sr_net_loss=[$3]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_store_sk=[$7], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[$0], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_call_center_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[$0], sales=[$1], returns=[CASE(IS NOT NULL($4), $4, 0)], profit=[-($2, CASE(IS NOT NULL($5), $5, 0))]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_web_page_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveProject(ws_web_page_sk=[CAST($1):INTEGER NOT NULL], ws_ext_sales_price=[$2], ws_net_profit=[$3]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_web_page_sk=[$12], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wr_web_page_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveProject(wr_web_page_sk=[CAST($1):INTEGER NOT NULL], wr_return_amt=[$2], wr_net_loss=[$3]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_web_page_sk=[$11], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out new file mode 100644 index 0000000000..de1b133006 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out @@ -0,0 +1,183 @@ +PREHOOK: query: explain cbo +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject(ss_sold_year=[CAST(2000):INTEGER], ss_item_sk=[$0], ss_customer_sk=[$1], ratio=[$2], store_qty=[$3], store_wholesale_cost=[$4], store_sales_price=[$5], other_chan_qty=[$6], other_chan_wholesale_cost=[$7], other_chan_sales_price=[$8]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$9], sort3=[$10], sort4=[$11], sort5=[$6], sort6=[$7], sort7=[$8], sort8=[$12], dir0=[ASC], dir1=[ASC], dir2=[DESC-nulls-last], dir3=[DESC-nulls-last], dir4=[DESC-nulls-last], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ratio=[round(/(CAST($2):DOUBLE, CAST(CASE(AND(IS NOT NULL($7), IS NOT NULL($11)), +($7, $11), 1)):DOUBLE), 2)], store_qty=[$2], store_wholesale_cost=[$3], store_sales_price=[$4], other_chan_qty=[+(CASE(IS NOT NULL($7), $7, 0), CASE(IS NOT NULL($11), $11, 0))], other_chan_wholesale_cost=[+(CASE(IS NOT NULL($8), $8, 0), CASE(IS NOT NULL($12), $12, 0))], other_chan_sales_price=[+(CASE(IS NOT NULL($9), $9, 0), CASE(IS NOT NULL($13), $13, 0))], ss_qty=[$2], ss_wc=[$3], ss_sp=[$4], (tok_function round (/ (tok_table_or_col ss_qty) (tok_function coalesce (+ (tok_table_or_col ws_qty) (tok_table_or_col cs_qty)) 1)) 2)=[round(/(CAST($2):DOUBLE, CAST(CASE(AND(IS NOT NULL($7), IS NOT NULL($11)), +($7, $11), 1)):DOUBLE), 2)]) + HiveFilter(condition=[CASE(IS NOT NULL($11), >($11, 0), false)]) + HiveJoin(condition=[=($10, $1)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4], ws_item_sk=[$5], ws_bill_customer_sk=[$6], $f20=[$7], $f30=[$8], $f40=[$9]) + HiveFilter(condition=[CASE(IS NOT NULL($7), >($7, 0), false)]) + HiveJoin(condition=[AND(=($5, $0), =($6, $1))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{2, 3}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_quantity=[$4], ss_wholesale_cost=[$5], ss_sales_price=[$6]) + HiveFilter(condition=[IS NULL($8)]) + HiveJoin(condition=[AND(=($8, $3), =($1, $7))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_wholesale_cost=[$11], ss_sales_price=[$13]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{2, 3}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_customer_sk=[$2], ws_quantity=[$4], ws_wholesale_cost=[$5], ws_sales_price=[$6]) + HiveFilter(condition=[IS NULL($8)]) + HiveJoin(condition=[AND(=($8, $3), =($1, $7))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_order_number=[$17], ws_quantity=[$18], ws_wholesale_cost=[$19], ws_sales_price=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject($f2=[$0], $f3=[$2], $f4=[$3], $f5=[$4]) + HiveAggregate(group=[{2, 3}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_quantity=[$4], cs_wholesale_cost=[$5], cs_sales_price=[$6]) + HiveFilter(condition=[IS NULL($8)]) + HiveJoin(condition=[AND(=($8, $3), =($2, $7))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_wholesale_cost=[$19], cs_sales_price=[$21]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query79.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query79.q.out new file mode 100644 index 0000000000..f567150397 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query79.q.out @@ -0,0 +1,82 @@ +PREHOOK: query: explain cbo +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(c_last_name=[$0], c_first_name=[$1], _o__c2=[$2], ss_ticket_number=[$3], amt=[$4], profit=[$5]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$6], sort3=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(c_last_name=[$2], c_first_name=[$1], _o__c2=[$8], ss_ticket_number=[$3], amt=[$6], profit=[$7], (tok_function substr (tok_table_or_col s_city) 1 30)=[substr($5, 1, 30)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$2], ss_customer_sk=[$0], s_city=[$3], amt=[$4], profit=[$5], substr=[substr($3, 1, 30)]) + HiveAggregate(group=[{0, 2, 3, 6}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_ticket_number=[$5], ss_coupon_amt=[$6], ss_net_profit=[$7], s_city=[$10]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), =($7, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_city=[$22]) + HiveFilter(condition=[BETWEEN(false, $6, 200, 295)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[OR(=($3, 8), >($4, 0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out new file mode 100644 index 0000000000..d13e1e85d8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out @@ -0,0 +1,266 @@ +PREHOOK: query: explain cbo +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(s_store_name=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($0)]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_net_profit=[$2], s_store_name=[$5], substr=[$6]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2002))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], substr=[substr($25, 1, 2)]) + HiveFilter(condition=[IS NOT NULL(substr($25, 1, 2))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(substr=[substr($0, 1, 2)]) + HiveFilter(condition=[=($1, 2)]) + HiveAggregate(group=[{0}], agg#0=[count($1)]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject(ca_zip=[substr($9, 1, 5)]) + HiveFilter(condition=[AND(IN(substr($9, 1, 5), _UTF-16LE'89436', _UTF-16LE'30868', _UTF-16LE'65085', _UTF-16LE'22977', _UTF-16LE'83927', _UTF-16LE'77557', _UTF-16LE'58429', _UTF-16LE'40697', _UTF-16LE'80614', _UTF-16LE'10502', _UTF-16LE'32779', _UTF-16LE'91137', _UTF-16LE'61265', _UTF-16LE'98294', _UTF-16LE'17921', _UTF-16LE'18427', _UTF-16LE'21203', _UTF-16LE'59362', _UTF-16LE'87291', _UTF-16LE'84093', _UTF-16LE'21505', _UTF-16LE'17184', _UTF-16LE'10866', _UTF-16LE'67898', _UTF-16LE'25797', _UTF-16LE'28055', _UTF-16LE'18377', _UTF-16LE'80332', _UTF-16LE'74535', _UTF-16LE'21757', _UTF-16LE'29742', _UTF-16LE'90885', _UTF-16LE'29898', _UTF-16LE'17819', _UTF-16LE'40811', _UTF-16LE'25990', _UTF-16LE'47513', _UTF-16LE'89531', _UTF-16LE'91068', _UTF-16LE'10391', _UTF-16LE'18846', _UTF-16LE'99223', _UTF-16LE'82637', _UTF-16LE'41368', _UTF-16LE'83658', _UTF-16LE'86199', _UTF-16LE'81625', _UTF-16LE'26696', _UTF-16LE'89338', _UTF-16LE'88425', _UTF-16LE'32200', _UTF-16LE'81427', _UTF-16LE'19053', _UTF-16LE'77471', _UTF-16LE'36610', _UTF-16LE'99823', _UTF-16LE'43276', _UTF-16LE'41249', _UTF-16LE'48584', _UTF-16LE'83550', _UTF-16LE'82276', _UTF-16LE'18842', _UTF-16LE'78890', _UTF-16LE'14090', _UTF-16LE'38123', _UTF-16LE'40936', _UTF-16LE'34425', _UTF-16LE'19850', _UTF-16LE'43286', _UTF-16LE'80072', _UTF-16LE'79188', _UTF-16LE'54191', _UTF-16LE'11395', _UTF-16LE'50497', _UTF-16LE'84861', _UTF-16LE'90733', _UTF-16LE'21068', _UTF-16LE'57666', _UTF-16LE'37119', _UTF-16LE'25004', _UTF-16LE'57835', _UTF-16LE'70067', _UTF-16LE'62878', _UTF-16LE'95806', _UTF-16LE'19303', _UTF-16LE'18840', _UTF-16LE'19124', _UTF-16LE'29785', _UTF-16LE'16737', _UTF-16LE'16022', _UTF-16LE'49613', _UTF-16LE'89977', _UTF-16LE'68310', _UTF-16LE'60069', _UTF-16LE'98360', _UTF-16LE'48649', _UTF-16LE'39050', _UTF-16LE'41793', _UTF-16LE'25002', _UTF-16LE'27413', _UTF-16LE'39736', _UTF-16LE'47208', _UTF-16LE'16515', _UTF-16LE'94808', _UTF-16LE'57648', _UTF-16LE'15009', _UTF-16LE'80015', _UTF-16LE'42961', _UTF-16LE'63982', _UTF-16LE'21744', _UTF-16LE'71853', _UTF-16LE'81087', _UTF-16LE'67468', _UTF-16LE'34175', _UTF-16LE'64008', _UTF-16LE'20261', _UTF-16LE'11201', _UTF-16LE'51799', _UTF-16LE'48043', _UTF-16LE'45645', _UTF-16LE'61163', _UTF-16LE'48375', _UTF-16LE'36447', _UTF-16LE'57042', _UTF-16LE'21218', _UTF-16LE'41100', _UTF-16LE'89951', _UTF-16LE'22745', _UTF-16LE'35851', _UTF-16LE'83326', _UTF-16LE'61125', _UTF-16LE'78298', _UTF-16LE'80752', _UTF-16LE'49858', _UTF-16LE'52940', _UTF-16LE'96976', _UTF-16LE'63792', _UTF-16LE'11376', _UTF-16LE'53582', _UTF-16LE'18717', _UTF-16LE'90226', _UTF-16LE'50530', _UTF-16LE'94203', _UTF-16LE'99447', _UTF-16LE'27670', _UTF-16LE'96577', _UTF-16LE'57856', _UTF-16LE'56372', _UTF-16LE'16165', _UTF-16LE'23427', _UTF-16LE'54561', _UTF-16LE'28806', _UTF-16LE'44439', _UTF-16LE'22926', _UTF-16LE'30123', _UTF-16LE'61451', _UTF-16LE'92397', _UTF-16LE'56979', _UTF-16LE'92309', _UTF-16LE'70873', _UTF-16LE'13355', _UTF-16LE'21801', _UTF-16LE'46346', _UTF-16LE'37562', _UTF-16LE'56458', _UTF-16LE'28286', _UTF-16LE'47306', _UTF-16LE'99555', _UTF-16LE'69399', _UTF-16LE'26234', _UTF-16LE'47546', _UTF-16LE'49661', _UTF-16LE'88601', _UTF-16LE'35943', _UTF-16LE'39936', _UTF-16LE'25632', _UTF-16LE'24611', _UTF-16LE'44166', _UTF-16LE'56648', _UTF-16LE'30379', _UTF-16LE'59785', _UTF-16LE'11110', _UTF-16LE'14329', _UTF-16LE'93815', _UTF-16LE'52226', _UTF-16LE'71381', _UTF-16LE'13842', _UTF-16LE'25612', _UTF-16LE'63294', _UTF-16LE'14664', _UTF-16LE'21077', _UTF-16LE'82626', _UTF-16LE'18799', _UTF-16LE'60915', _UTF-16LE'81020', _UTF-16LE'56447', _UTF-16LE'76619', _UTF-16LE'11433', _UTF-16LE'13414', _UTF-16LE'42548', _UTF-16LE'92713', _UTF-16LE'70467', _UTF-16LE'30884', _UTF-16LE'47484', _UTF-16LE'16072', _UTF-16LE'38936', _UTF-16LE'13036', _UTF-16LE'88376', _UTF-16LE'45539', _UTF-16LE'35901', _UTF-16LE'19506', _UTF-16LE'65690', _UTF-16LE'73957', _UTF-16LE'71850', _UTF-16LE'49231', _UTF-16LE'14276', _UTF-16LE'20005', _UTF-16LE'18384', _UTF-16LE'76615', _UTF-16LE'11635', _UTF-16LE'38177', _UTF-16LE'55607', _UTF-16LE'41369', _UTF-16LE'95447', _UTF-16LE'58581', _UTF-16LE'58149', _UTF-16LE'91946', _UTF-16LE'33790', _UTF-16LE'76232', _UTF-16LE'75692', _UTF-16LE'95464', _UTF-16LE'22246', _UTF-16LE'51061', _UTF-16LE'56692', _UTF-16LE'53121', _UTF-16LE'77209', _UTF-16LE'15482', _UTF-16LE'10688', _UTF-16LE'14868', _UTF-16LE'45907', _UTF-16LE'73520', _UTF-16LE'72666', _UTF-16LE'25734', _UTF-16LE'17959', _UTF-16LE'24677', _UTF-16LE'66446', _UTF-16LE'94627', _UTF-16LE'53535', _UTF-16LE'15560', _UTF-16LE'41967', _UTF-16LE'69297', _UTF-16LE'11929', _UTF-16LE'59403', _UTF-16LE'33283', _UTF-16LE'52232', _UTF-16LE'57350', _UTF-16LE'43933', _UTF-16LE'40921', _UTF-16LE'36635', _UTF-16LE'10827', _UTF-16LE'71286', _UTF-16LE'19736', _UTF-16LE'80619', _UTF-16LE'25251', _UTF-16LE'95042', _UTF-16LE'15526', _UTF-16LE'36496', _UTF-16LE'55854', _UTF-16LE'49124', _UTF-16LE'81980', _UTF-16LE'35375', _UTF-16LE'49157', _UTF-16LE'63512', _UTF-16LE'28944', _UTF-16LE'14946', _UTF-16LE'36503', _UTF-16LE'54010', _UTF-16LE'18767', _UTF-16LE'23969', _UTF-16LE'43905', _UTF-16LE'66979', _UTF-16LE'33113', _UTF-16LE'21286', _UTF-16LE'58471', _UTF-16LE'59080', _UTF-16LE'13395', _UTF-16LE'79144', _UTF-16LE'70373', _UTF-16LE'67031', _UTF-16LE'38360', _UTF-16LE'26705', _UTF-16LE'50906', _UTF-16LE'52406', _UTF-16LE'26066', _UTF-16LE'73146', _UTF-16LE'15884', _UTF-16LE'31897', _UTF-16LE'30045', _UTF-16LE'61068', _UTF-16LE'45550', _UTF-16LE'92454', _UTF-16LE'13376', _UTF-16LE'14354', _UTF-16LE'19770', _UTF-16LE'22928', _UTF-16LE'97790', _UTF-16LE'50723', _UTF-16LE'46081', _UTF-16LE'30202', _UTF-16LE'14410', _UTF-16LE'20223', _UTF-16LE'88500', _UTF-16LE'67298', _UTF-16LE'13261', _UTF-16LE'14172', _UTF-16LE'81410', _UTF-16LE'93578', _UTF-16LE'83583', _UTF-16LE'46047', _UTF-16LE'94167', _UTF-16LE'82564', _UTF-16LE'21156', _UTF-16LE'15799', _UTF-16LE'86709', _UTF-16LE'37931', _UTF-16LE'74703', _UTF-16LE'83103', _UTF-16LE'23054', _UTF-16LE'70470', _UTF-16LE'72008', _UTF-16LE'49247', _UTF-16LE'91911', _UTF-16LE'69998', _UTF-16LE'20961', _UTF-16LE'70070', _UTF-16LE'63197', _UTF-16LE'54853', _UTF-16LE'88191', _UTF-16LE'91830', _UTF-16LE'49521', _UTF-16LE'19454', _UTF-16LE'81450', _UTF-16LE'89091', _UTF-16LE'62378', _UTF-16LE'25683', _UTF-16LE'61869', _UTF-16LE'51744', _UTF-16LE'36580', _UTF-16LE'85778', _UTF-16LE'36871', _UTF-16LE'48121', _UTF-16LE'28810', _UTF-16LE'83712', _UTF-16LE'45486', _UTF-16LE'67393', _UTF-16LE'26935', _UTF-16LE'42393', _UTF-16LE'20132', _UTF-16LE'55349', _UTF-16LE'86057', _UTF-16LE'21309', _UTF-16LE'80218', _UTF-16LE'10094', _UTF-16LE'11357', _UTF-16LE'48819', _UTF-16LE'39734', _UTF-16LE'40758', _UTF-16LE'30432', _UTF-16LE'21204', _UTF-16LE'29467', _UTF-16LE'30214', _UTF-16LE'61024', _UTF-16LE'55307', _UTF-16LE'74621', _UTF-16LE'11622', _UTF-16LE'68908', _UTF-16LE'33032', _UTF-16LE'52868', _UTF-16LE'99194', _UTF-16LE'99900', _UTF-16LE'84936', _UTF-16LE'69036', _UTF-16LE'99149', _UTF-16LE'45013', _UTF-16LE'32895', _UTF-16LE'59004', _UTF-16LE'32322', _UTF-16LE'14933', _UTF-16LE'32936', _UTF-16LE'33562', _UTF-16LE'72550', _UTF-16LE'27385', _UTF-16LE'58049', _UTF-16LE'58200', _UTF-16LE'16808', _UTF-16LE'21360', _UTF-16LE'32961', _UTF-16LE'18586', _UTF-16LE'79307', _UTF-16LE'15492'), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ca_zip=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject(ca_zip=[substr($0, 1, 5)]) + HiveFilter(condition=[>($1, 10)]) + HiveAggregate(group=[{1}], agg#0=[count()]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL(substr(substr($9, 1, 5), 1, 2))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(=($10, _UTF-16LE'Y'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out new file mode 100644 index 0000000000..23f1c70b3a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: explain cbo +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_page +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_page +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[||(_UTF-16LE'store', $0)], sales=[$1], returns=[$2], profit=[$3]) + HiveAggregate(group=[{2}], agg#0=[sum($1)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_promo_sk=[$3], ss_ext_sales_price=[$4], s_store_id=[$8], CASE=[$5], -=[$6]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_item_sk=[$1], ss_store_sk=[$2], ss_promo_sk=[$3], ss_ext_sales_price=[$5], CASE=[CASE(IS NOT NULL($9), $9, 0)], -=[-($6, CASE(IS NOT NULL($10), $10, 0))]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[||(_UTF-16LE'catalog_page', $0)], sales=[$1], returns=[$2], profit=[$3]) + HiveAggregate(group=[{2}], agg#0=[sum($1)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_promo_sk=[$5], cs_ext_sales_price=[$6], cp_catalog_page_id=[$1], CASE=[$7], -=[$8]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) + HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_catalog_page_sk=[$1], cs_item_sk=[$2], cs_promo_sk=[$3], cs_ext_sales_price=[$5], CASE=[CASE(IS NOT NULL($9), $9, 0)], -=[-($6, CASE(IS NOT NULL($10), $10, 0))]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$12], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$2], profit=[$3]) + HiveAggregate(group=[{3}], agg#0=[sum($2)], agg#1=[sum($4)], agg#2=[sum($5)]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(ws_promo_sk=[$3], ws_ext_sales_price=[$4], web_site_id=[$8], CASE=[$5], -=[$6]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_item_sk=[$1], ws_web_site_sk=[$2], ws_promo_sk=[$3], ws_ext_sales_price=[$5], CASE=[CASE(IS NOT NULL($9), $9, 0)], -=[-($6, CASE(IS NOT NULL($10), $10, 0))]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_site_sk=[$13], ws_promo_sk=[$16], ws_order_number=[$17], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($13), IS NOT NULL($16))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out new file mode 100644 index 0000000000..98066643bf --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out @@ -0,0 +1,113 @@ +PREHOOK: query: explain cbo +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_name=[$3], ca_street_number=[$4], ca_street_name=[$5], ca_street_type=[$6], ca_suite_number=[$7], ca_city=[$8], ca_county=[$9], ca_state=[CAST(_UTF-16LE'IL'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], ca_zip=[$10], ca_country=[$11], ca_gmt_offset=[$12], ca_location_type=[$13], ctr_total_return=[$14]) + HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10], sort11=[$11], sort12=[$12], sort13=[$13], sort14=[$14], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], dir10=[ASC], dir11=[ASC], dir12=[ASC], dir13=[ASC], dir14=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1], c_salutation=[$3], c_first_name=[$4], c_last_name=[$5], ca_street_number=[$7], ca_street_name=[$8], ca_street_type=[$9], ca_suite_number=[$10], ca_city=[$11], ca_county=[$12], ca_zip=[$13], ca_country=[$14], ca_gmt_offset=[$15], ca_location_type=[$16], ctr_total_return=[$19]) + HiveJoin(condition=[=($17, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_current_addr_sk=[$4], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12]) + HiveFilter(condition=[=($8, _UTF-16LE'IL')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) + HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10), IS NOT NULL($7))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_state=[$0]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ca_state=[$0], cr_returning_customer_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query82.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query82.q.out new file mode 100644 index 0000000000..5efbaaced8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query82.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) + HiveAggregate(group=[{2, 3, 4}]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$3], i_item_id=[$4], i_item_desc=[$5], i_current_price=[$6]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) + HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00, 2002-07-29 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) + HiveFilter(condition=[AND(IN($13, 437, 129, 727, 663), BETWEEN(false, $5, 30, 60))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out new file mode 100644 index 0000000000..ee94ea3f99 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_id=[$0], sr_item_qty=[$4], sr_dev=[*(/(/($5, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], cr_item_qty=[$1], cr_dev=[*(/(/($2, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], wr_item_qty=[$7], wr_dev=[*(/(/($8, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], average=[/(CAST(+(+($4, $1), $7)):DECIMAL(19, 0), 3)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(item_id=[$0], cr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_item_sk=[$2], cr_return_quantity=[$17]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_id=[$0], sr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_return_quantity=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_id=[$0], wr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_return_quantity=[$14]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out new file mode 100644 index 0000000000..5675d749a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out @@ -0,0 +1,83 @@ +PREHOOK: query: explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(customer_id=[$0], customername=[$1]) + HiveSortLimit(sort0=[$2], dir0=[ASC], fetch=[100]) + HiveProject(customer_id=[$1], customername=[$3], c_customer_id=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_cdemo_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(c_customer_id=[$0], cd_demo_sk=[$2], ||=[$3]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$1], c_current_hdemo_sk=[$3], cd_demo_sk=[$0], ||=[$5]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$1], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], ||=[||(||($9, _UTF-16LE', '), $8)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($6, _UTF-16LE'Hopewell')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[AND(>=($1, 32287), <=($2, 82287))]) + HiveTableScan(table=[[default, income_band]], table:alias=[income_band]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out new file mode 100644 index 0000000000..37259673ee --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out @@ -0,0 +1,216 @@ +PREHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@reason +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) + HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) + HiveAggregate(group=[{1}], agg#0=[sum($4)], agg#1=[count($4)], agg#2=[sum($7)], agg#3=[count($7)], agg#4=[sum($6)], agg#5=[count($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$7], ws_quantity=[$8], wr_reason_sk=[$11], wr_fee=[$12], wr_refunded_cash=[$13]) + HiveJoin(condition=[AND(=($0, $9), OR(AND($1, $16), AND($2, $17), AND($3, $18)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[AND(AND(=($0, $6), =($10, $1)), =($11, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(ws_sold_date_sk=[$17], ws_quantity=[$20], wr_refunded_addr_sk=[$11], wr_returning_cdemo_sk=[$12], wr_reason_sk=[$13], wr_fee=[$15], wr_refunded_cash=[$16], cd_marital_status=[$1], cd_education_status=[$2], BETWEEN=[$21], BETWEEN10=[$22], BETWEEN11=[$23]) + HiveJoin(condition=[AND(=($0, $10), OR(AND($3, $4, $24), AND($5, $6, $25), AND($7, $8, $26)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[AND(=($9, $0), =($10, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100, 200)], BETWEEN6=[BETWEEN(false, $33, 150, 300)], BETWEEN7=[BETWEEN(false, $33, 50, 250)], BETWEEN8=[BETWEEN(false, $21, 100, 150)], BETWEEN9=[BETWEEN(false, $21, 50, 100)], BETWEEN10=[BETWEEN(false, $21, 150, 200)]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $21, 100, 150), BETWEEN(false, $21, 50, 100), BETWEEN(false, $21, 150, 200)), OR(BETWEEN(false, $33, 100, 200), BETWEEN(false, $33, 150, 300), BETWEEN(false, $33, 50, 250)), IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out new file mode 100644 index 0000000000..8e89983463 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(total_sum=[$2], i_category=[$0], i_class=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col i_category))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$1], $f2=[$5]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_net_paid=[$29]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out new file mode 100644 index 0000000000..97c9eddc34 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out @@ -0,0 +1,111 @@ +PREHOOK: query: explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[AND(>($3, 0), =(*($3, 2), $4))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f5=[*($3, $4)]) + HiveUnion(all=[true]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[2], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[AND(>($3, 0), =(*($3, 2), $4))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f5=[*($3, $4)]) + HiveUnion(all=[true]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[2], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out new file mode 100644 index 0000000000..0297a86b79 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out @@ -0,0 +1,355 @@ +Warning: Shuffle Join MERGEJOIN[607][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[608][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[609][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[610][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[611][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[612][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[613][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[$0], $f00=[$7], $f01=[$6], $f02=[$5], $f03=[$4], $f04=[$3], $f05=[$2], $f06=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 12), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 11), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 11), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 10), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 10), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 9), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 9), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out new file mode 100644 index 0000000000..1dd2c09c3b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)]) + HiveFilter(condition=[CASE(<>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1), null)]) + HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 2, 5, 7, 8}], agg#0=[sum($4)]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand=[$5], i_class=[$6], i_category=[$7], ss_store_sk=[$2], ss_sales_price=[$3], d_moy=[$9]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical', _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'), IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics', _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical')), AND(IN($12, _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), IN($10, _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'))))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_moy=[$8]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out new file mode 100644 index 0000000000..37cae20f5e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out @@ -0,0 +1,205 @@ +Warning: Shuffle Join MERGEJOIN[176][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[181][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[186][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 12' is a cross product +Warning: Shuffle Join MERGEJOIN[187][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[188][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[190][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +PREHOOK: query: explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(bucket1=[$1], bucket2=[$2], bucket3=[$3], bucket4=[$4], bucket5=[CASE($5, $0, $6)]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(_o__c012=[$7], CASE=[$1], CASE2=[$2], CASE3=[$3], CASE4=[CASE($4, $0, $5)], >=[$6]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(_o__c09=[$6], CASE=[$1], CASE2=[$2], CASE3=[CASE($3, $0, $4)], >=[$5]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(_o__c06=[$5], CASE=[$1], CASE2=[CASE($2, $0, $3)], >=[$4]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(_o__c03=[$4], CASE=[CASE($1, $0, $2)], >=[$3]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(_o__c00=[$2], >=[$1]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(r_reason_sk=[1]) + HiveFilter(condition=[=($0, 1)]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveProject(>=[>($0, 409437)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 4595804)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 7887297)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 10872978)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 43571537)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out new file mode 100644 index 0000000000..11ff97cf95 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out @@ -0,0 +1,94 @@ +Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +PREHOOK: query: explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(am_pm_ratio=[/($0, $1)]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(CAST=[CAST($0):DECIMAL(15, 4)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_web_page_sk=[$2]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 6, 7)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 8)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveProject(CAST=[CAST($0):DECIMAL(15, 4)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_web_page_sk=[$2]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 14, 15)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 8)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out new file mode 100644 index 0000000000..a40880a21a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out @@ -0,0 +1,110 @@ +PREHOOK: query: explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss=[$3]) + HiveSortLimit(sort0=[$4], dir0=[DESC-nulls-last]) + HiveProject(call_center=[$2], call_center_name=[$3], manager=[$4], returns_loss=[$5], (tok_function sum (tok_table_or_col cr_net_loss))=[$5]) + HiveAggregate(group=[{2, 3, 4, 5, 6}], agg#0=[sum($7)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[LIKE($2, _UTF-16LE'0-500%')]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(cc_call_center_id=[$5], cc_name=[$6], cc_manager=[$7], cr_net_loss=[$9], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cc_call_center_id=[$6], cc_name=[$7], cc_manager=[$8], cr_returning_customer_sk=[$1], cr_net_loss=[$3]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_call_center_sk=[$11], cr_net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_call_center_id=[$1], cc_name=[$6], cc_manager=[$11]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out new file mode 100644 index 0000000000..16098d7243 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(excess discount amount=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(excess discount amount=[$0], (tok_function sum (tok_table_or_col ws_ext_discount_amt))=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[AND(>($2, $5), =($6, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_item_sk=[$0], CAST3=[$1], i_item_sk=[$2]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$0], CAST3=[CAST(*(1.3, /($1, $2))):DECIMAL(14, 7)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out new file mode 100644 index 0000000000..efbb525669 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($2)]) + HiveProject(ss_customer_sk=[$1], sr_reason_sk=[$6], CASE=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))], r_reason_sk=[$9]) + HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(r_reason_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'Did not like the warranty')]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out new file mode 100644 index 0000000000..198778f8e7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($15)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(wr_order_number0=[$13], $f1=[true]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr1]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out new file mode 100644 index 0000000000..0546f1ce5e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_order_number=[$14]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0]) + HiveFilter(condition=[=($14, _UTF-16LE'pri')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out new file mode 100644 index 0000000000..6d58fdaaab --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(_o__c0=[$0], (tok_functionstar count)=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$2]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 5)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out new file mode 100644 index 0000000000..6151627c83 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[sum($1)], agg#2=[sum($2)]) + HiveProject($f0=[CASE(AND(IS NOT NULL($0), IS NULL($2)), 1, 0)], $f1=[CASE(AND(IS NULL($0), IS NOT NULL($2)), 1, 0)], $f2=[CASE(AND(IS NOT NULL($0), IS NOT NULL($2)), 1, 0)]) + HiveJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[full], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_item_sk=[$0]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_bill_customer_sk=[$0], cs_item_sk=[$1]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out new file mode 100644 index 0000000000..2a6b66f2bf --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out new file mode 100644 index 0000000000..2318274cbf --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out @@ -0,0 +1,103 @@ +PREHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(_o__c0=[$1], sm_type=[$0], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7], (tok_function substr (tok_table_or_col w_warehouse_name) 1 20)=[$1]) + HiveAggregate(group=[{2, 3, 11}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)], agg#3=[sum($7)], agg#4=[sum($8)]) + HiveJoin(condition=[=($1, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$0], cs_call_center_sk=[$1], sm_type=[$10], substr=[$12], CASE=[$4], CASE5=[$5], CASE6=[$6], CASE7=[$7], CASE8=[$8]) + HiveJoin(condition=[=($3, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_call_center_sk=[$11], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14], CASE=[CASE(<=(-($2, $0), 30), 1, 0)], CASE5=[CASE(AND(>(-($2, $0), 30), <=(-($2, $0), 60)), 1, 0)], CASE6=[CASE(AND(>(-($2, $0), 60), <=(-($2, $0), 90)), 1, 0)], CASE7=[CASE(AND(>(-($2, $0), 90), <=(-($2, $0), 120)), 1, 0)], CASE8=[CASE(>(-($2, $0), 120), 1, 0)]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sm_ship_mode_sk=[$0], sm_type=[$2]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject(w_warehouse_sk=[$0], substr=[substr($2, 1, 20)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out new file mode 100644 index 0000000000..69f93290d9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_159] + Limit [LIM_158] (rows=100 width=100) + Number of rows:100 + Select Operator [SEL_157] (rows=816091 width=100) + Output:["_col0"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_49] + Select Operator [SEL_48] (rows=816091 width=100) + Output:["_col0"] + Filter Operator [FIL_47] (rows=816091 width=324) + predicate:(_col2 > _col6) + Merge Join Operator [MERGEJOIN_133] (rows=2448274 width=324) + Conds:RS_44._col1=RS_156._col1(Inner),Output:["_col2","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_131] (rows=2369298 width=213) + Conds:RS_41._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=80000000 width=104) + Output:["_col0","_col1"] + TableScan [TS_17] (rows=80000000 width=104) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_130] (rows=2369298 width=114) + Conds:RS_146._col1=RS_149._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=35 width=4) + Output:["_col0"] + Filter Operator [FIL_147] (rows=35 width=90) + predicate:(s_state = 'NM') + TableScan [TS_14] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + PartitionCols:_col1 + Select Operator [SEL_145] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_144] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=16855704 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_129] (rows=16855704 width=107) + Conds:RS_138._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=51757026 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_134] (rows=51757026 width=119) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null) + TableScan [TS_0] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] + PartitionCols:_col0 + Select Operator [SEL_141] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_140] (rows=652 width=8) + predicate:(d_year = 2000) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col1 + Select Operator [SEL_155] (rows=31 width=115) + Output:["_col0","_col1"] + Group By Operator [GBY_154] (rows=31 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_153] (rows=14291868 width=119) + Output:["_col1","_col2"] + Group By Operator [GBY_152] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=17467258 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_132] (rows=17467258 width=107) + Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=53634860 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_135] (rows=53634860 width=119) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) + Please refer to the previous TableScan [TS_0] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_141] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out new file mode 100644 index 0000000000..3fbd92878e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out @@ -0,0 +1,379 @@ +PREHOOK: query: explain +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_235] + Limit [LIM_234] (rows=1 width=419) + Number of rows:100 + Select Operator [SEL_233] (rows=1 width=419) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + Select Operator [SEL_231] (rows=1 width=419) + Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] + Group By Operator [GBY_230] (rows=1 width=379) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Group By Operator [GBY_65] (rows=1 width=379) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Top N Key Operator [TNK_102] (rows=58 width=379) + keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100 + Select Operator [SEL_64] (rows=58 width=379) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_63] (rows=58 width=379) + predicate:(_col15 is not null or _col17 is not null) + Merge Join Operator [MERGEJOIN_180] (rows=58 width=379) + Conds:RS_60._col0=RS_229._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_60] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_179] (rows=58 width=379) + Conds:RS_57._col0=RS_219._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_57] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_178] (rows=7792 width=375) + Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_174] (rows=3914656 width=375) + Conds:RS_49._col1=RS_188._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_188] + PartitionCols:_col0 + Select Operator [SEL_187] (rows=1861800 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + TableScan [TS_6] (rows=1861800 width=375) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_173] (rows=3860070 width=5) + Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col2 + Select Operator [SEL_182] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_181] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col0 + Select Operator [SEL_185] (rows=2000000 width=102) + Output:["_col0"] + Filter Operator [FIL_184] (rows=2000000 width=102) + predicate:(ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') + TableScan [TS_3] (rows=40000000 width=102) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Group By Operator [GBY_53] (rows=155827 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_17] (rows=57825495 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_175] (rows=57825495 width=2) + Conds:RS_209._col0=RS_191._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_190] (rows=201 width=4) + Output:["_col0"] + Filter Operator [FIL_189] (rows=201 width=12) + predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7) + TableScan [TS_11] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_207] (rows=525327388 width=7) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_54_c_c_customer_sk_min) AND DynamicValue(RS_54_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_54_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_8] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_206] + Group By Operator [GBY_205] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3647763)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3647763)"] + Select Operator [SEL_133] (rows=3914656 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_174] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_204] + Group By Operator [GBY_203] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + Group By Operator [GBY_197] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_192] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_219] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=155827 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_217] (rows=155827 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=155827 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=15843227 width=3) + Conds:RS_216._col0=RS_193._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_193] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_190] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_214] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_57_c_c_customer_sk_min) AND DynamicValue(RS_57_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_57_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_18] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_150] + Group By Operator [GBY_149] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_148] (rows=7792 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_178] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_198] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_194] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_229] + PartitionCols:_col0 + Select Operator [SEL_228] (rows=154725 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_227] (rows=154725 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Group By Operator [GBY_42] (rows=154725 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=31162251 width=3) + Conds:RS_226._col0=RS_195._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_195] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_190] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_226] + PartitionCols:_col0 + Select Operator [SEL_225] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_224] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_60_c_c_customer_sk_min) AND DynamicValue(RS_60_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_60_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_32] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_221] + Group By Operator [GBY_220] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_196] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_223] + Group By Operator [GBY_222] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_165] + Group By Operator [GBY_164] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_163] (rows=58 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_179] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out new file mode 100644 index 0000000000..c13a7e7291 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out @@ -0,0 +1,429 @@ +PREHOOK: query: explain +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 24 <- Reducer 18 (BROADCAST_EDGE) +Map 25 <- Reducer 22 (BROADCAST_EDGE) +Map 26 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 23 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (ONE_TO_ONE_EDGE) +Reducer 13 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 24 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 15 <- Map 23 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 18 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 25 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 26 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_348] + Limit [LIM_347] (rows=100 width=85) + Number of rows:100 + Select Operator [SEL_346] (rows=12248093 width=85) + Output:["_col0"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_90] + Select Operator [SEL_89] (rows=12248093 width=85) + Output:["_col0"] + Filter Operator [FIL_88] (rows=12248093 width=433) + predicate:CASE WHEN (_col5) THEN (CASE WHEN (_col6) THEN (((_col1 / _col4) > _col7)) ELSE (_col8) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col4) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_282] (rows=24496187 width=433) + Conds:RS_317._col0=RS_86._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 12 [ONE_TO_ONE_EDGE] + FORWARD [RS_86] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=20485012 width=421) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_281] (rows=20485012 width=529) + Conds:RS_81._col0=RS_325._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7","_col8"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] + PartitionCols:_col0 + Select Operator [SEL_324] (rows=80000000 width=297) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_323] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_74] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_279] (rows=187573258 width=764) + Conds:RS_70._col1=RS_312._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=80000000 width=656) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + TableScan [TS_6] (rows=80000000 width=656) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_278] (rows=187573258 width=115) + Conds:RS_322._col0=RS_291._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] + PartitionCols:_col0 + Select Operator [SEL_286] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_283] (rows=652 width=8) + predicate:(d_year = 2002) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Select Operator [SEL_321] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_320] (rows=525327388 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_59] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_319] + Group By Operator [GBY_318] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] + Group By Operator [GBY_298] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_292] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_286] + <-Reducer 17 [ONE_TO_ONE_EDGE] + FORWARD [RS_81] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_280] (rows=17130654 width=332) + Conds:RS_335._col0=RS_345._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_335] + PartitionCols:_col0 + Select Operator [SEL_334] (rows=26666666 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_333] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_332] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_331] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_34] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_275] (rows=187573258 width=764) + Conds:RS_30._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_313] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_274] (rows=187573258 width=115) + Conds:RS_330._col0=RS_293._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_284] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_3] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] + PartitionCols:_col0 + Select Operator [SEL_329] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_328] (rows=525327388 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_19] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_327] + Group By Operator [GBY_326] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_303] + Group By Operator [GBY_299] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_294] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_287] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_345] + PartitionCols:_col0 + Select Operator [SEL_344] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_343] (rows=17130654 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_342] (rows=51391963 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_341] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_54] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=764) + Conds:RS_50._col1=RS_314._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_276] (rows=51391963 width=115) + Conds:RS_340._col0=RS_295._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_285] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_3] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_340] + PartitionCols:_col0 + Select Operator [SEL_339] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_338] (rows=143930993 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_39] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_337] + Group By Operator [GBY_336] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] + Group By Operator [GBY_300] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_296] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_288] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_317] + PartitionCols:_col0 + Select Operator [SEL_316] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_315] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_15] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_273] (rows=51391963 width=764) + Conds:RS_11._col1=RS_311._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_272] (rows=51391963 width=115) + Conds:RS_309._col0=RS_289._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_286] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col0 + Select Operator [SEL_308] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_307] (rows=143930993 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] + Group By Operator [GBY_297] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_290] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_286] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out new file mode 100644 index 0000000000..741bd90666 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out @@ -0,0 +1,169 @@ +PREHOOK: query: explain +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_86] + Limit [LIM_85] (rows=100 width=802) + Number of rows:100 + Select Operator [SEL_84] (rows=138600 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] + Select Operator [SEL_82] (rows=138600 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + PTF Operator [PTF_81] (rows=138600 width=689) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] + Select Operator [SEL_80] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + PartitionCols:_col1 + Group By Operator [GBY_78] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_16] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_58] (rows=4798568 width=689) + Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_69] + PartitionCols:_col0 + Select Operator [SEL_68] (rows=138600 width=581) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_67] (rows=138600 width=581) + predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') + TableScan [TS_6] (rows=462000 width=581) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_57] (rows=15995224 width=115) + Conds:RS_77._col0=RS_61._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_59] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_77] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_75] (rows=143966864 width=119) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_74] + Group By Operator [GBY_73] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] + Group By Operator [GBY_71] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_70] (rows=138600 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_68] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_66] + Group By Operator [GBY_65] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_64] + Group By Operator [GBY_63] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_62] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_60] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out new file mode 100644 index 0000000000..d06a4ce1a0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: explain +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 10 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 6 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_136] + Select Operator [SEL_135] (rows=1 width=344) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_134] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"] + Select Operator [SEL_30] (rows=32631 width=24) + Output:["_col3","_col4","_col5"] + Filter Operator [FIL_29] (rows=32631 width=24) + predicate:((_col10 and _col6) or (_col11 and _col7) or (_col12 and _col8)) + Merge Join Operator [MERGEJOIN_98] (rows=43511 width=24) + Conds:RS_26._col2=RS_125._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11","_col12"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Select Operator [SEL_124] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_123] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) + TableScan [TS_20] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_97] (rows=493115 width=42) + Conds:RS_101._col0=RS_24._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_100] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_99] (rows=652 width=8) + predicate:(d_year = 2001) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=1381041 width=174) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_18] (rows=1381041 width=218) + predicate:((_col17 and _col18 and _col13 and _col1) or (_col19 and _col20 and _col14 and _col2) or (_col21 and _col22 and _col15 and _col2)) + Merge Join Operator [MERGEJOIN_96] (rows=7365565 width=218) + Conds:RS_15._col4=RS_115._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col17","_col18","_col19","_col20","_col21","_col22"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + PartitionCols:_col0 + Select Operator [SEL_114] (rows=265971 width=28) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_113] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) + TableScan [TS_9] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_95] (rows=9243020 width=212) + Conds:RS_107._col0=RS_133._col2(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=1309 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_105] (rows=1309 width=8) + predicate:(hd_dep_count) IN (3, 1) + TableScan [TS_3] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_133] + PartitionCols:_col2 + Select Operator [SEL_132] (rows=50840141 width=257) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Filter Operator [FIL_131] (rows=50840141 width=450) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_27_customer_address_ca_address_sk_min) AND DynamicValue(RS_27_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_27_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_12_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_12_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_12_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=450) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_116] (rows=265971 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_114] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3529412)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + Group By Operator [GBY_127] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3529412)"] + Select Operator [SEL_126] (rows=3529412 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_124] + <-Reducer 5 [BROADCAST_EDGE] vectorized + BROADCAST [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_102] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_100] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_108] (rows=1309 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out new file mode 100644 index 0000000000..e8a6eaa464 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out @@ -0,0 +1,1400 @@ +Warning: Shuffle Join MERGEJOIN[1431][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1433][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[1456][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[1435][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1469][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product +PREHOOK: query: explain +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 105 (BROADCAST_EDGE) +Map 107 <- Reducer 102 (BROADCAST_EDGE) +Map 108 <- Reducer 104 (BROADCAST_EDGE) +Map 109 <- Reducer 63 (BROADCAST_EDGE), Reducer 81 (BROADCAST_EDGE) +Map 110 <- Reducer 68 (BROADCAST_EDGE), Reducer 90 (BROADCAST_EDGE) +Map 20 <- Reducer 25 (BROADCAST_EDGE) +Map 36 <- Reducer 41 (BROADCAST_EDGE) +Map 46 <- Reducer 106 (BROADCAST_EDGE) +Map 50 <- Reducer 29 (BROADCAST_EDGE) +Map 51 <- Reducer 43 (BROADCAST_EDGE) +Map 52 <- Reducer 58 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) +Map 97 <- Reducer 100 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE), Union 11 (CONTAINS) +Reducer 100 <- Map 99 (CUSTOM_SIMPLE_EDGE) +Reducer 101 <- Map 107 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE) +Reducer 102 <- Map 99 (CUSTOM_SIMPLE_EDGE) +Reducer 103 <- Map 108 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE) +Reducer 104 <- Map 99 (CUSTOM_SIMPLE_EDGE) +Reducer 105 <- Map 99 (CUSTOM_SIMPLE_EDGE) +Reducer 106 <- Map 99 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Union 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 62 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 17 <- Union 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 67 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE), Union 11 (CONTAINS) +Reducer 23 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 24 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 28 <- Union 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 24 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Union 31 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 24 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 35 <- Union 34 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 36 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 38 <- Map 36 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE), Union 11 (CONTAINS) +Reducer 39 <- Map 36 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) +Reducer 41 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 42 <- Map 40 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 43 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 40 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 45 <- Map 40 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 48 <- Map 46 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 49 <- Map 46 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 5 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 54 <- Map 69 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 55 <- Reducer 54 (ONE_TO_ONE_EDGE), Reducer 70 (SIMPLE_EDGE) +Reducer 56 <- Reducer 55 (SIMPLE_EDGE) +Reducer 58 <- Map 57 (CUSTOM_SIMPLE_EDGE) +Reducer 59 <- Map 109 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 60 <- Map 69 (SIMPLE_EDGE), Reducer 59 (SIMPLE_EDGE) +Reducer 61 <- Reducer 60 (ONE_TO_ONE_EDGE), Reducer 80 (SIMPLE_EDGE) +Reducer 62 <- Reducer 61 (SIMPLE_EDGE) +Reducer 63 <- Map 57 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 110 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 65 <- Map 69 (SIMPLE_EDGE), Reducer 64 (SIMPLE_EDGE) +Reducer 66 <- Reducer 65 (ONE_TO_ONE_EDGE), Reducer 89 (SIMPLE_EDGE) +Reducer 67 <- Reducer 66 (SIMPLE_EDGE) +Reducer 68 <- Map 57 (CUSTOM_SIMPLE_EDGE) +Reducer 70 <- Map 69 (SIMPLE_EDGE), Reducer 75 (ONE_TO_ONE_EDGE) +Reducer 71 <- Reducer 70 (CUSTOM_SIMPLE_EDGE) +Reducer 72 <- Map 69 (SIMPLE_EDGE), Reducer 98 (SIMPLE_EDGE) +Reducer 73 <- Reducer 72 (SIMPLE_EDGE), Union 74 (CONTAINS) +Reducer 75 <- Union 74 (SIMPLE_EDGE) +Reducer 76 <- Map 69 (SIMPLE_EDGE), Reducer 101 (SIMPLE_EDGE) +Reducer 77 <- Reducer 76 (SIMPLE_EDGE), Union 74 (CONTAINS) +Reducer 78 <- Map 69 (SIMPLE_EDGE), Reducer 103 (SIMPLE_EDGE) +Reducer 79 <- Reducer 78 (SIMPLE_EDGE), Union 74 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 80 <- Map 69 (SIMPLE_EDGE), Reducer 85 (ONE_TO_ONE_EDGE) +Reducer 81 <- Reducer 80 (CUSTOM_SIMPLE_EDGE) +Reducer 82 <- Map 69 (SIMPLE_EDGE), Reducer 98 (SIMPLE_EDGE) +Reducer 83 <- Reducer 82 (SIMPLE_EDGE), Union 84 (CONTAINS) +Reducer 85 <- Union 84 (SIMPLE_EDGE) +Reducer 86 <- Reducer 82 (SIMPLE_EDGE), Union 87 (CONTAINS) +Reducer 88 <- Union 87 (SIMPLE_EDGE) +Reducer 89 <- Map 69 (SIMPLE_EDGE), Reducer 88 (ONE_TO_ONE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 90 <- Reducer 89 (CUSTOM_SIMPLE_EDGE) +Reducer 91 <- Map 69 (SIMPLE_EDGE), Reducer 101 (SIMPLE_EDGE) +Reducer 92 <- Reducer 91 (SIMPLE_EDGE), Union 84 (CONTAINS) +Reducer 93 <- Reducer 91 (SIMPLE_EDGE), Union 87 (CONTAINS) +Reducer 94 <- Map 69 (SIMPLE_EDGE), Reducer 103 (SIMPLE_EDGE) +Reducer 95 <- Reducer 94 (SIMPLE_EDGE), Union 84 (CONTAINS) +Reducer 96 <- Reducer 94 (SIMPLE_EDGE), Union 87 (CONTAINS) +Reducer 98 <- Map 97 (SIMPLE_EDGE), Map 99 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_1703] + Limit [LIM_1702] (rows=7 width=192) + Number of rows:100 + Select Operator [SEL_1701] (rows=7 width=192) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1700] + Select Operator [SEL_1699] (rows=7 width=192) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_1698] (rows=7 width=200) + Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] + Reduce Output Operator [RS_1462] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_1461] (rows=7 width=200) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Top N Key Operator [TNK_1460] (rows=3 width=221) + keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100 + Select Operator [SEL_1458] (rows=1 width=223) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_1457] (rows=1 width=244) + predicate:(_col5 > _col1) + Merge Join Operator [MERGEJOIN_1456] (rows=1 width=244) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_371] + Merge Join Operator [MERGEJOIN_1433] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1709] + Select Operator [SEL_1708] (rows=1 width=8) + Filter Operator [FIL_1707] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_1706] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1705] (rows=1 width=8) + Group By Operator [GBY_1704] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Union 11 [CUSTOM_SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] + Reduce Output Operator [RS_1455] + Group By Operator [GBY_1454] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1453] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1451] (rows=14736682 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1450] (rows=14736682 width=0) + Conds:RS_1633._col0=RS_1614._col0(Inner),Output:["_col1"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1614] + PartitionCols:_col0 + Select Operator [SEL_1603] (rows=1957 width=4) + Output:["_col0"] + Filter Operator [FIL_1602] (rows=1957 width=8) + predicate:d_year BETWEEN 1999 AND 2001 + TableScan [TS_96] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1633] + PartitionCols:_col0 + Select Operator [SEL_1631] (rows=550076554 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1630] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity"] + <-Reducer 105 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1629] + Group By Operator [GBY_1628] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 99 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1626] + Group By Operator [GBY_1621] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1611] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1603] + <-Reducer 22 [CONTAINS] + Reduce Output Operator [RS_1487] + Group By Operator [GBY_1486] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1485] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1483] (rows=7676736 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1482] (rows=7676736 width=3) + Conds:RS_1767._col0=RS_1754._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1754] + PartitionCols:_col0 + Select Operator [SEL_1749] (rows=1957 width=4) + Output:["_col0"] + Filter Operator [FIL_1748] (rows=1957 width=8) + predicate:d_year BETWEEN 1998 AND 2000 + TableScan [TS_13] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1767] + PartitionCols:_col0 + Select Operator [SEL_1765] (rows=286549727 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1764] (rows=286549727 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_10] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1763] + Group By Operator [GBY_1762] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1760] + Group By Operator [GBY_1758] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1751] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1749] + <-Reducer 38 [CONTAINS] + Reduce Output Operator [RS_1523] + Group By Operator [GBY_1522] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1521] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1519] (rows=3856907 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1518] (rows=3856907 width=3) + Conds:RS_1795._col0=RS_1782._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1782] + PartitionCols:_col0 + Select Operator [SEL_1777] (rows=1957 width=4) + Output:["_col0"] + Filter Operator [FIL_1776] (rows=1957 width=8) + predicate:d_year BETWEEN 1998 AND 2000 + TableScan [TS_24] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1795] + PartitionCols:_col0 + Select Operator [SEL_1793] (rows=143966864 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1792] (rows=143966864 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_21] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity"] + <-Reducer 41 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1791] + Group By Operator [GBY_1790] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1788] + Group By Operator [GBY_1786] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1779] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1777] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1712] + Select Operator [SEL_1711] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_1710] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 31 [CUSTOM_SIMPLE_EDGE] + <-Reducer 30 [CONTAINS] + Reduce Output Operator [RS_1505] + Group By Operator [GBY_1504] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1503] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1501] (rows=7676736 width=94) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1500] (rows=7676736 width=94) + Conds:RS_1774._col0=RS_1755._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1755] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1749] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1774] + PartitionCols:_col0 + Select Operator [SEL_1772] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1771] (rows=286549727 width=119) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_62_date_dim_d_date_sk_min) AND DynamicValue(RS_62_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_62_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_55] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1770] + Group By Operator [GBY_1769] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1761] + Group By Operator [GBY_1759] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1753] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1749] + <-Reducer 44 [CONTAINS] + Reduce Output Operator [RS_1541] + Group By Operator [GBY_1540] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1539] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1537] (rows=3856907 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1536] (rows=3856907 width=114) + Conds:RS_1802._col0=RS_1783._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1783] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1777] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1802] + PartitionCols:_col0 + Select Operator [SEL_1800] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1799] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_66] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1798] + Group By Operator [GBY_1797] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1789] + Group By Operator [GBY_1787] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1781] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1777] + <-Reducer 48 [CONTAINS] + Reduce Output Operator [RS_1559] + Group By Operator [GBY_1558] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1557] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1555] (rows=14736682 width=0) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1554] (rows=14736682 width=0) + Conds:RS_1809._col0=RS_1615._col0(Inner),Output:["_col1","_col2"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1615] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1809] + PartitionCols:_col0 + Select Operator [SEL_1807] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1806] (rows=550076554 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_45] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] + <-Reducer 106 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1805] + Group By Operator [GBY_1804] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 99 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1627] + Group By Operator [GBY_1622] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1613] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1603] + <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1725] + Group By Operator [GBY_1724] (rows=1 width=132) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 61 [SIMPLE_EDGE] + SHUFFLE [RS_365] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_364] (rows=1 width=132) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 + Select Operator [SEL_362] (rows=1 width=128) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1429] (rows=1 width=128) + Conds:RS_359._col1=RS_360._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col8"] + <-Reducer 80 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_360] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1412] (rows=724 width=4) + Conds:RS_1682._col1, _col2, _col3=RS_1718._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1682] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_1671] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1662] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null) + TableScan [TS_90] (rows=462000 width=15) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] + <-Reducer 85 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1718] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_1717] (rows=1 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1716] (rows=1 width=20) + predicate:(_col3 = 3L) + Group By Operator [GBY_1715] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 84 [SIMPLE_EDGE] + <-Reducer 83 [CONTAINS] vectorized + Reduce Output Operator [RS_1837] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1836] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1835] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 82 [SIMPLE_EDGE] + SHUFFLE [RS_298] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_297] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1407] (rows=14628613 width=11) + Conds:RS_293._col1=RS_1683._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1683] + PartitionCols:_col0 + Select Operator [SEL_1672] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1663] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_90] + <-Reducer 98 [SIMPLE_EDGE] + SHUFFLE [RS_293] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1391] (rows=14736682 width=4) + Conds:RS_1815._col0=RS_1604._col0(Inner),Output:["_col1"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1604] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 97 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1815] + PartitionCols:_col0 + Select Operator [SEL_1814] (rows=550076554 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1813] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_103_d1_d_date_sk_min) AND DynamicValue(RS_103_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_103_d1_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_93] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] + <-Reducer 100 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1812] + Group By Operator [GBY_1811] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 99 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1623] + Group By Operator [GBY_1618] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1605] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1603] + <-Reducer 92 [CONTAINS] vectorized + Reduce Output Operator [RS_1843] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1842] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1841] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 91 [SIMPLE_EDGE] + SHUFFLE [RS_318] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_317] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1409] (rows=7620440 width=11) + Conds:RS_313._col1=RS_1684._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1684] + PartitionCols:_col0 + Select Operator [SEL_1673] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1664] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_90] + <-Reducer 101 [SIMPLE_EDGE] + SHUFFLE [RS_313] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1393] (rows=7676736 width=4) + Conds:RS_1823._col0=RS_1606._col0(Inner),Output:["_col1"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1606] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 107 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1823] + PartitionCols:_col0 + Select Operator [SEL_1822] (rows=286549727 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1821] (rows=286549727 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_123_d2_d_date_sk_min) AND DynamicValue(RS_123_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_123_d2_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_113] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] + <-Reducer 102 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1820] + Group By Operator [GBY_1819] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 99 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1624] + Group By Operator [GBY_1619] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1607] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1603] + <-Reducer 95 [CONTAINS] vectorized + Reduce Output Operator [RS_1849] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1848] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1847] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 94 [SIMPLE_EDGE] + SHUFFLE [RS_339] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_338] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1411] (rows=3828623 width=11) + Conds:RS_334._col1=RS_1685._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1685] + PartitionCols:_col0 + Select Operator [SEL_1674] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1665] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_90] + <-Reducer 103 [SIMPLE_EDGE] + SHUFFLE [RS_334] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1395] (rows=3856907 width=4) + Conds:RS_1831._col0=RS_1608._col0(Inner),Output:["_col1"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1608] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 108 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1831] + PartitionCols:_col0 + Select Operator [SEL_1830] (rows=143966864 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1829] (rows=143966864 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_144_d3_d_date_sk_min) AND DynamicValue(RS_144_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_144_d3_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_134] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] + <-Reducer 104 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1828] + Group By Operator [GBY_1827] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 99 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1625] + Group By Operator [GBY_1620] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1609] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1603] + <-Reducer 60 [ONE_TO_ONE_EDGE] + FORWARD [RS_359] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1405] (rows=7790806 width=110) + Conds:RS_356._col1=RS_1675._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1675] + PartitionCols:_col0 + Select Operator [SEL_1666] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Please refer to the previous TableScan [TS_90] + <-Reducer 59 [SIMPLE_EDGE] + SHUFFLE [RS_356] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1404] (rows=7790806 width=98) + Conds:RS_1723._col0=RS_1648._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 57 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1648] + PartitionCols:_col0 + Select Operator [SEL_1645] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_1644] (rows=50 width=12) + predicate:((d_moy = 11) and (d_year = 2000)) + TableScan [TS_85] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 109 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1723] + PartitionCols:_col0 + Select Operator [SEL_1722] (rows=286549727 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1721] (rows=286549727 width=123) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_360_item_i_item_sk_min) AND DynamicValue(RS_360_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_360_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_354_date_dim_d_date_sk_min) AND DynamicValue(RS_354_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_354_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_270] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 63 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1714] + Group By Operator [GBY_1713] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1656] + Group By Operator [GBY_1653] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1649] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1645] + <-Reducer 81 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1720] + Group By Operator [GBY_1719] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 80 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_1077] + Group By Operator [GBY_1076] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1075] (rows=724 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_1412] + <-Reducer 19 [CONTAINS] + Reduce Output Operator [RS_1475] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_1474] (rows=7 width=200) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Top N Key Operator [TNK_1473] (rows=3 width=221) + keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100 + Select Operator [SEL_1471] (rows=1 width=219) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_1470] (rows=1 width=244) + predicate:(_col5 > _col1) + Merge Join Operator [MERGEJOIN_1469] (rows=1 width=244) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_560] + Merge Join Operator [MERGEJOIN_1435] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1731] + Select Operator [SEL_1730] (rows=1 width=8) + Filter Operator [FIL_1729] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_1728] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1727] (rows=1 width=8) + Group By Operator [GBY_1726] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Union 16 [CUSTOM_SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] + Reduce Output Operator [RS_1468] + Group By Operator [GBY_1467] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1466] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1464] (rows=14736682 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1463] (rows=14736682 width=0) + Conds:RS_1634._col0=RS_1616._col0(Inner),Output:["_col1"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1616] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1634] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1631] + <-Reducer 23 [CONTAINS] + Reduce Output Operator [RS_1493] + Group By Operator [GBY_1492] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1491] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1489] (rows=7676736 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1488] (rows=7676736 width=3) + Conds:RS_1768._col0=RS_1756._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1756] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1749] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1768] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1765] + <-Reducer 39 [CONTAINS] + Reduce Output Operator [RS_1529] + Group By Operator [GBY_1528] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1527] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1525] (rows=3856907 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1524] (rows=3856907 width=3) + Conds:RS_1796._col0=RS_1784._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1784] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1777] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1796] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1793] + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1734] + Select Operator [SEL_1733] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_1732] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 34 [CUSTOM_SIMPLE_EDGE] + <-Reducer 33 [CONTAINS] + Reduce Output Operator [RS_1511] + Group By Operator [GBY_1510] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1509] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1507] (rows=7676736 width=94) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1506] (rows=7676736 width=94) + Conds:RS_1775._col0=RS_1757._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1757] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1749] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1775] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1772] + <-Reducer 45 [CONTAINS] + Reduce Output Operator [RS_1547] + Group By Operator [GBY_1546] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1545] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1543] (rows=3856907 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1542] (rows=3856907 width=114) + Conds:RS_1803._col0=RS_1785._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1785] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1777] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1803] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1800] + <-Reducer 49 [CONTAINS] + Reduce Output Operator [RS_1565] + Group By Operator [GBY_1564] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1563] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1561] (rows=14736682 width=0) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1560] (rows=14736682 width=0) + Conds:RS_1810._col0=RS_1617._col0(Inner),Output:["_col1","_col2"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1617] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1810] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1807] + <-Reducer 67 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1747] + Group By Operator [GBY_1746] (rows=1 width=132) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 66 [SIMPLE_EDGE] + SHUFFLE [RS_554] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_553] (rows=1 width=132) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 + Select Operator [SEL_551] (rows=1 width=128) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1430] (rows=1 width=128) + Conds:RS_548._col1=RS_549._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col8"] + <-Reducer 89 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_549] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1427] (rows=724 width=4) + Conds:RS_1686._col1, _col2, _col3=RS_1740._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1686] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_1676] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1667] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_90] + <-Reducer 88 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1740] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_1739] (rows=1 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1738] (rows=1 width=20) + predicate:(_col3 = 3L) + Group By Operator [GBY_1737] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 87 [SIMPLE_EDGE] + <-Reducer 86 [CONTAINS] vectorized + Reduce Output Operator [RS_1840] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1839] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1838] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 82 [SIMPLE_EDGE] + SHUFFLE [RS_487] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_297] + <-Reducer 93 [CONTAINS] vectorized + Reduce Output Operator [RS_1846] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1845] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1844] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 91 [SIMPLE_EDGE] + SHUFFLE [RS_507] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_317] + <-Reducer 96 [CONTAINS] vectorized + Reduce Output Operator [RS_1852] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1851] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1850] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 94 [SIMPLE_EDGE] + SHUFFLE [RS_528] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_338] + <-Reducer 65 [ONE_TO_ONE_EDGE] + FORWARD [RS_548] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1420] (rows=3942084 width=130) + Conds:RS_545._col1=RS_1677._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1677] + PartitionCols:_col0 + Select Operator [SEL_1668] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Please refer to the previous TableScan [TS_90] + <-Reducer 64 [SIMPLE_EDGE] + SHUFFLE [RS_545] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1419] (rows=3942084 width=118) + Conds:RS_1745._col0=RS_1650._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 57 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1650] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1645] + <-Map 110 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1745] + PartitionCols:_col0 + Select Operator [SEL_1744] (rows=143966864 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1743] (rows=143966864 width=123) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_549_item_i_item_sk_min) AND DynamicValue(RS_549_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_549_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_543_date_dim_d_date_sk_min) AND DynamicValue(RS_543_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_543_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_459] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] + <-Reducer 68 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1736] + Group By Operator [GBY_1735] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1657] + Group By Operator [GBY_1654] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1651] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1645] + <-Reducer 90 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1742] + Group By Operator [GBY_1741] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 89 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_1266] + Group By Operator [GBY_1265] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1264] (rows=724 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_1427] + <-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_1449] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_1448] (rows=7 width=200) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Top N Key Operator [TNK_1447] (rows=3 width=221) + keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100 + Select Operator [SEL_1445] (rows=1 width=221) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_1444] (rows=1 width=244) + predicate:(_col5 > _col1) + Merge Join Operator [MERGEJOIN_1443] (rows=1 width=244) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_183] + Merge Join Operator [MERGEJOIN_1431] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1643] + Select Operator [SEL_1642] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_1641] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 27 [CUSTOM_SIMPLE_EDGE] + <-Reducer 26 [CONTAINS] + Reduce Output Operator [RS_1499] + Group By Operator [GBY_1498] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1497] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1495] (rows=7676736 width=94) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1494] (rows=7676736 width=94) + Conds:RS_1773._col0=RS_1752._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1752] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1749] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1773] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1772] + <-Reducer 42 [CONTAINS] + Reduce Output Operator [RS_1535] + Group By Operator [GBY_1534] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1533] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1531] (rows=3856907 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1530] (rows=3856907 width=114) + Conds:RS_1801._col0=RS_1780._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1780] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1777] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1801] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1800] + <-Reducer 47 [CONTAINS] + Reduce Output Operator [RS_1553] + Group By Operator [GBY_1552] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1551] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1549] (rows=14736682 width=0) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1548] (rows=14736682 width=0) + Conds:RS_1808._col0=RS_1612._col0(Inner),Output:["_col1","_col2"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1612] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1808] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1807] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1640] + Select Operator [SEL_1639] (rows=1 width=8) + Filter Operator [FIL_1638] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_1637] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1636] (rows=1 width=8) + Group By Operator [GBY_1635] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Union 3 [CUSTOM_SIMPLE_EDGE] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_1442] + Group By Operator [GBY_1441] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1440] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1438] (rows=14736682 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1437] (rows=14736682 width=0) + Conds:RS_1632._col0=RS_1610._col0(Inner),Output:["_col1"] + <-Map 99 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1610] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1603] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1632] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1631] + <-Reducer 21 [CONTAINS] + Reduce Output Operator [RS_1481] + Group By Operator [GBY_1480] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1479] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1477] (rows=7676736 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1476] (rows=7676736 width=3) + Conds:RS_1766._col0=RS_1750._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1750] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1749] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1766] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1765] + <-Reducer 37 [CONTAINS] + Reduce Output Operator [RS_1517] + Group By Operator [GBY_1516] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1515] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1513] (rows=3856907 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1512] (rows=3856907 width=3) + Conds:RS_1794._col0=RS_1778._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1778] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1777] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1794] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1793] + <-Reducer 56 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1697] + Group By Operator [GBY_1696] (rows=1 width=132) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_177] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_176] (rows=1 width=132) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 + Select Operator [SEL_174] (rows=1 width=128) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1428] (rows=1 width=128) + Conds:RS_171._col1=RS_172._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col8"] + <-Reducer 70 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_172] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1397] (rows=724 width=4) + Conds:RS_1678._col1, _col2, _col3=RS_1690._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1678] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_1669] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1660] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_90] + <-Reducer 75 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1690] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_1689] (rows=1 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1688] (rows=1 width=20) + predicate:(_col3 = 3L) + Group By Operator [GBY_1687] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 74 [SIMPLE_EDGE] + <-Reducer 73 [CONTAINS] vectorized + Reduce Output Operator [RS_1818] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1817] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1816] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 72 [SIMPLE_EDGE] + SHUFFLE [RS_110] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_109] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1392] (rows=14628613 width=11) + Conds:RS_105._col1=RS_1679._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1679] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1669] + <-Reducer 98 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_1391] + <-Reducer 77 [CONTAINS] vectorized + Reduce Output Operator [RS_1826] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1825] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1824] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 76 [SIMPLE_EDGE] + SHUFFLE [RS_130] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_129] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1394] (rows=7620440 width=11) + Conds:RS_125._col1=RS_1680._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1680] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1669] + <-Reducer 101 [SIMPLE_EDGE] + SHUFFLE [RS_125] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_1393] + <-Reducer 79 [CONTAINS] vectorized + Reduce Output Operator [RS_1834] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1833] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1832] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 78 [SIMPLE_EDGE] + SHUFFLE [RS_151] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_150] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1396] (rows=3828623 width=11) + Conds:RS_146._col1=RS_1681._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1681] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1669] + <-Reducer 103 [SIMPLE_EDGE] + SHUFFLE [RS_146] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_1395] + <-Reducer 54 [ONE_TO_ONE_EDGE] + FORWARD [RS_171] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1390] (rows=15062131 width=15) + Conds:RS_168._col1=RS_1670._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1670] + PartitionCols:_col0 + Select Operator [SEL_1661] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Please refer to the previous TableScan [TS_90] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_168] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1389] (rows=15062131 width=4) + Conds:RS_1695._col0=RS_1646._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 57 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1646] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1645] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1695] + PartitionCols:_col0 + Select Operator [SEL_1694] (rows=550076554 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1693] (rows=550076554 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_172_item_i_item_sk_min) AND DynamicValue(RS_172_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_172_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_166_date_dim_d_date_sk_min) AND DynamicValue(RS_166_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_166_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_82] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] + <-Reducer 58 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1659] + Group By Operator [GBY_1658] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1655] + Group By Operator [GBY_1652] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1647] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1645] + <-Reducer 71 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1692] + Group By Operator [GBY_1691] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 70 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_888] + Group By Operator [GBY_887] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_886] (rows=724 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_1397] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out new file mode 100644 index 0000000000..6c7611b297 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 8 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_98] + Limit [LIM_97] (rows=100 width=201) + Number of rows:100 + Select Operator [SEL_96] (rows=717 width=201) + Output:["_col0","_col1"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_95] + Group By Operator [GBY_94] (rows=717 width=201) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Group By Operator [GBY_23] (rows=11472 width=201) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 + Top N Key Operator [TNK_44] (rows=20154874 width=193) + keys:_col2,sort order:+,top n:100 + Merge Join Operator [MERGEJOIN_77] (rows=20154874 width=193) + Conds:RS_19._col0=RS_85._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_85] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_83] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000)) + TableScan [TS_16] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_15] (rows=285117831 width=204) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_14] (rows=285117831 width=216) + predicate:(_col4 or _col5 or _col9) + Merge Join Operator [MERGEJOIN_76] (rows=285117831 width=216) + Conds:RS_11._col0=RS_93._col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] + PartitionCols:_col1 + Select Operator [SEL_92] (rows=285117831 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_91] (rows=285117831 width=119) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_5] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_86] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_84] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_75] (rows=80000000 width=101) + Conds:RS_80._col1=RS_82._col0(Inner),Output:["_col0","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_80] + PartitionCols:_col1 + Select Operator [SEL_79] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_78] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_82] + PartitionCols:_col0 + Select Operator [SEL_81] (rows=40000000 width=101) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_3] (rows=40000000 width=179) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out new file mode 100644 index 0000000000..3143be8480 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out @@ -0,0 +1,244 @@ +PREHOOK: query: explain +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_169] + Limit [LIM_168] (rows=1 width=240) + Number of rows:100 + Select Operator [SEL_167] (rows=1 width=240) + Output:["_col0","_col1","_col2"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + Select Operator [SEL_165] (rows=1 width=240) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_164] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] + Group By Operator [GBY_161] (rows=5150256 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=5150256 width=228) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 + Select Operator [SEL_37] (rows=5150256 width=218) + Output:["_col4","_col5","_col6"] + Filter Operator [FIL_36] (rows=5150256 width=218) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_125] (rows=13282454 width=218) + Conds:RS_33._col4=RS_160._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_160] + PartitionCols:_col0 + Select Operator [SEL_159] (rows=28798881 width=8) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=28798881 width=4) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_33] + PartitionCols:_col4 + Select Operator [SEL_32] (rows=5150256 width=200) + Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_124] (rows=5150256 width=202) + Conds:RS_29._col4=RS_158._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_123] (rows=5150256 width=200) + Conds:RS_18._col2=RS_144._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=10 width=102) + Output:["_col0"] + Filter Operator [FIL_142] (rows=10 width=102) + predicate:(cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') + TableScan [TS_9] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_122] (rows=30901534 width=230) + Conds:RS_15._col1=RS_136._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_135] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_134] (rows=784314 width=90) + predicate:(ca_state = 'NY') + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_121] (rows=31519516 width=234) + Conds:RS_152._col0=RS_128._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Select Operator [SEL_127] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_126] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=283695062 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_150] (rows=283695062 width=243) + predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=243) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_129] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_127] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_141] + Group By Operator [GBY_140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_137] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_135] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_149] + Group By Operator [GBY_148] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + Group By Operator [GBY_146] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_145] (rows=10 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_143] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + PartitionCols:_col0 + Group By Operator [GBY_157] (rows=286548719 width=7) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_156] (rows=286548719 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=286548719 width=7) + predicate:((cs_order_number BETWEEN DynamicValue(RS_29_cs1_cs_order_number_min) AND DynamicValue(RS_29_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_29_cs1_cs_order_number_bloom_filter))) and cs_warehouse_sk is not null) + TableScan [TS_22] (rows=287989836 width=7) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_109] (rows=5150256 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_123] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out new file mode 100644 index 0000000000..4b69e67ca2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out @@ -0,0 +1,271 @@ +PREHOOK: query: explain +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 5 <- Reducer 15 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 17 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_251] + Limit [LIM_250] (rows=100 width=466) + Number of rows:100 + Select Operator [SEL_249] (rows=13317347 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] + Select Operator [SEL_247] (rows=13317347 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_246] (rows=13317347 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_48] (rows=13317347 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 + Top N Key Operator [TNK_99] (rows=13317347 width=377) + keys:_col0, _col1, _col2,sort order:+++,top n:100 + Select Operator [SEL_46] (rows=13317347 width=377) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_213] (rows=13317347 width=377) + Conds:RS_215._col0=RS_44._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_215] + PartitionCols:_col0 + Select Operator [SEL_214] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=13317347 width=97) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_212] (rows=13317347 width=97) + Conds:RS_39._col1=RS_245._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col8"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + PartitionCols:_col0 + Select Operator [SEL_244] (rows=1704 width=90) + Output:["_col0","_col1"] + TableScan [TS_34] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_211] (rows=13317347 width=11) + Conds:RS_36._col4=RS_225._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_221] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_218] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + TableScan [TS_18] (rows=73049 width=94) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col4 + Select Operator [SEL_30] (rows=266379760 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_210] (rows=266379760 width=23) + Conds:RS_27._col4=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_217] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + Please refer to the previous TableScan [TS_18] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_209] (rows=5328251731 width=27) + Conds:RS_24._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=101 width=4) + Output:["_col0"] + Filter Operator [FIL_216] (rows=101 width=94) + predicate:(d_quarter_name = '2000Q1') + Please refer to the previous TableScan [TS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=96331893945 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_208] (rows=96331893945 width=31) + Conds:RS_14._col7, _col8=RS_243._col2, _col1(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col10","_col11","_col14"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] + PartitionCols:_col2, _col1 + Select Operator [SEL_242] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_241] (rows=285117831 width=15) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_37_d3_d_date_sk_min) AND DynamicValue(RS_37_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_37_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_8] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_240] + Group By Operator [GBY_239] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + Group By Operator [GBY_228] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_226] (rows=3652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_221] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col7, _col8 + Merge Join Operator [MERGEJOIN_207] (rows=478292911 width=31) + Conds:RS_235._col1, _col2, _col4=RS_238._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_237] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_236] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_5] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_234] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_233] (rows=501694138 width=23) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_232] + Group By Operator [GBY_231] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + Group By Operator [GBY_227] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_223] (rows=101 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_219] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out new file mode 100644 index 0000000000..360e768994 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out @@ -0,0 +1,240 @@ +Warning: Shuffle Join MERGEJOIN[139][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 10 <- Reducer 14 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (CUSTOM_SIMPLE_EDGE), Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_179] + Limit [LIM_178] (rows=100 width=1165) + Number of rows:100 + Select Operator [SEL_177] (rows=14343773970 width=1165) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_176] + Select Operator [SEL_175] (rows=14343773970 width=1165) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Group By Operator [GBY_174] (rows=14343773970 width=1229) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_40] (rows=52751489185150 width=1229) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)","sum(_col10)","count(_col10)","sum(_col11)","count(_col11)"],keys:_col2, _col3, _col4, _col14, 0L + Merge Join Operator [MERGEJOIN_142] (rows=10550297837030 width=1164) + Conds:RS_36._col1=RS_173._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col14"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_173] + PartitionCols:_col0 + Select Operator [SEL_172] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_31] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_141] (rows=10550297837030 width=1068) + Conds:RS_33._col0=RS_161._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_159] (rows=652 width=8) + predicate:(d_year = 2001) + TableScan [TS_28] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=29758070991000 width=1072) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_140] (rows=29758070991000 width=1072) + Conds:RS_24._col0, _col1=RS_25._col1, _col3(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_24] + PartitionCols:_col0, _col1 + Merge Join Operator [MERGEJOIN_137] (rows=4890586 width=371) + Conds:RS_145._col2=RS_148._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + PartitionCols:_col2 + Select Operator [SEL_144] (rows=35631408 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_143] (rows=35631408 width=19) + predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=19) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=5490196 width=285) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_146] (rows=5490196 width=285) + predicate:(ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') + TableScan [TS_3] (rows=40000000 width=285) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col1, _col3 + Select Operator [SEL_20] (rows=29758070991000 width=687) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_139] (rows=29758070991000 width=687) + Conds:(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_171] + Select Operator [SEL_170] (rows=1861800 width=4) + Output:["_col0"] + TableScan [TS_12] (rows=1861800 width=4) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_17] + Merge Join Operator [MERGEJOIN_138] (rows=15983495 width=643) + Conds:RS_169._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=103433 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=103433 width=187) + predicate:((cd_education_status = 'College') and (cd_gender = 'M')) + TableScan [TS_9] (rows=1861800 width=187) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_169] + PartitionCols:_col2 + Select Operator [SEL_168] (rows=283692098 width=573) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_167] (rows=283692098 width=466) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_15_cd1_cd_demo_sk_min) AND DynamicValue(RS_15_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_15_cd1_cd_demo_sk_bloom_filter))) and (cs_bill_customer_sk BETWEEN DynamicValue(RS_24_customer_c_customer_sk_min) AND DynamicValue(RS_24_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_24_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=466) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_152] (rows=103433 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_150] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + Group By Operator [GBY_163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_162] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_160] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4890586)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4890586)"] + Select Operator [SEL_102] (rows=4890586 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_137] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out new file mode 100644 index 0000000000..eeba1e4f85 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: explain +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 8 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_154] + Limit [LIM_153] (rows=100 width=419) + Number of rows:100 + Select Operator [SEL_152] (rows=2098703 width=418) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + Select Operator [SEL_150] (rows=2098703 width=418) + Output:["_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_149] (rows=2098703 width=314) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_35] (rows=2098703 width=314) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)"],keys:_col3, _col2, _col4, _col5 + Select Operator [SEL_34] (rows=2098703 width=570) + Output:["_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_33] (rows=2098703 width=570) + predicate:(_col6 <> _col8) + Merge Join Operator [MERGEJOIN_122] (rows=2098703 width=570) + Conds:RS_30._col0=RS_148._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=1704 width=188) + Output:["_col0","_col1"] + TableScan [TS_28] (rows=1704 width=93) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_zip"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=2098703 width=386) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_121] (rows=2098703 width=386) + Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=2098703 width=202) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_120] (rows=2098703 width=202) + Conds:RS_17._col1=RS_138._col0(Inner),Output:["_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=7333 width=206) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_136] (rows=7333 width=210) + predicate:(i_manager_id = 7) + TableScan [TS_11] (rows=462000 width=210) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id","i_manufact","i_manager_id"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_119] (rows=13737330 width=4) + Conds:RS_146._col0=RS_130._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] + PartitionCols:_col0 + Select Operator [SEL_129] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_128] (rows=50 width=12) + predicate:((d_moy = 11) and (d_year = 1999)) + TableScan [TS_8] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + PartitionCols:_col0 + Select Operator [SEL_145] (rows=501694138 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_144] (rows=501694138 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_18_item_i_item_sk_min) AND DynamicValue(RS_18_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_131] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_129] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_141] + Group By Operator [GBY_140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_139] (rows=7333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_137] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=188) + Conds:RS_125._col1=RS_127._col0(Inner),Output:["_col0","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col1 + Select Operator [SEL_124] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_123] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col0 + Select Operator [SEL_126] (rows=40000000 width=188) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=40000000 width=93) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out new file mode 100644 index 0000000000..baa714bc95 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out @@ -0,0 +1,228 @@ +PREHOOK: query: explain +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Union 2 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 5 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_173] + Select Operator [SEL_172] (rows=12881 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_57] + Select Operator [SEL_56] (rows=12881 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_146] (rows=12881 width=1572) + Conds:RS_53._col0=RS_54._col7(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_53] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_143] (rows=652 width=788) + Conds:RS_164._col0=RS_170._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + PartitionCols:_col0 + Select Operator [SEL_168] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_166] (rows=652 width=8) + predicate:((d_year = 2001) and d_week_seq is not null) + TableScan [TS_20] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_week_seq","d_year"] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_164] + PartitionCols:_col0 + Group By Operator [GBY_163] (rows=13152 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=3182784 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 + Select Operator [SEL_14] (rows=430516591 width=143) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_142] (rows=430516591 width=143) + Conds:Union 2._col0=RS_162._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_162] + PartitionCols:_col0 + Select Operator [SEL_161] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_160] (rows=73049 width=99) + predicate:d_week_seq is not null + TableScan [TS_8] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + Reduce Output Operator [RS_159] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=143966864 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=143966864 width=115) + predicate:ws_sold_date_sk is not null + TableScan [TS_147] (rows=144002668 width=115) + Output:["ws_sold_date_sk","ws_ext_sales_price"] + <-Map 9 [CONTAINS] vectorized + Reduce Output Operator [RS_176] + PartitionCols:_col0 + Select Operator [SEL_175] (rows=286549727 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_174] (rows=286549727 width=115) + predicate:cs_sold_date_sk is not null + TableScan [TS_152] (rows=287989836 width=115) + Output:["cs_sold_date_sk","cs_ext_sales_price"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col7 + Select Operator [SEL_49] (rows=652 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_145] (rows=652 width=788) + Conds:RS_165._col0=RS_171._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_171] + PartitionCols:_col0 + Select Operator [SEL_169] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_167] (rows=652 width=8) + predicate:((d_year = 2002) and d_week_seq is not null) + Please refer to the previous TableScan [TS_20] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_165] + PartitionCols:_col0 + Please refer to the previous Group By Operator [GBY_163] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out new file mode 100644 index 0000000000..da3e262352 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: explain +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_86] + Limit [LIM_85] (rows=100 width=802) + Number of rows:100 + Select Operator [SEL_84] (rows=138600 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] + Select Operator [SEL_82] (rows=138600 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + PTF Operator [PTF_81] (rows=138600 width=689) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] + Select Operator [SEL_80] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + PartitionCols:_col1 + Group By Operator [GBY_78] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_16] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_58] (rows=9551005 width=673) + Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_69] + PartitionCols:_col0 + Select Operator [SEL_68] (rows=138600 width=581) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_67] (rows=138600 width=581) + predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') + TableScan [TS_6] (rows=462000 width=581) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_57] (rows=31836679 width=110) + Conds:RS_77._col0=RS_61._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_59] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_77] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_75] (rows=286549727 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_74] + Group By Operator [GBY_73] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] + Group By Operator [GBY_71] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_70] (rows=138600 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_68] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_66] + Group By Operator [GBY_65] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_64] + Group By Operator [GBY_63] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_62] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_60] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out new file mode 100644 index 0000000000..a307b667f7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out @@ -0,0 +1,147 @@ +PREHOOK: query: explain +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_92] + Limit [LIM_91] (rows=100 width=216) + Number of rows:100 + Select Operator [SEL_90] (rows=8556 width=216) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_89] + Filter Operator [FIL_88] (rows=8556 width=216) + predicate:CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END + Group By Operator [GBY_87] (rows=17112 width=216) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=17112 width=216) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 + Select Operator [SEL_20] (rows=463966 width=208) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_76] (rows=463966 width=208) + Conds:RS_17._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_85] (rows=8116 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_84] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' + TableScan [TS_14] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=4175973 width=206) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_75] (rows=4175973 width=206) + Conds:RS_10._col2=RS_83._col0(Inner),Output:["_col0","_col3","_col5","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=27 width=104) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_74] (rows=4175973 width=110) + Conds:RS_78._col1=RS_81._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] + PartitionCols:_col1 + Select Operator [SEL_77] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + PartitionCols:_col0 + Select Operator [SEL_80] (rows=51333 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_79] (rows=51333 width=215) + predicate:i_current_price BETWEEN 0.99 AND 1.49 + TableScan [TS_2] (rows=462000 width=215) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out new file mode 100644 index 0000000000..cd3c0cc431 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: explain +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_64] + Limit [LIM_63] (rows=100 width=397) + Number of rows:100 + Select Operator [SEL_62] (rows=32730675 width=397) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_61] + Select Operator [SEL_60] (rows=32730675 width=397) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_59] (rows=32730675 width=413) + Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_14] (rows=32730675 width=413) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col2)","count(_col2)"],keys:_col5, _col6, _col7, _col8, 0L + Merge Join Operator [MERGEJOIN_51] (rows=6546135 width=391) + Conds:RS_10._col1=RS_58._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_57] (rows=462000 width=393) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_5] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_50] (rows=6546135 width=6) + Conds:RS_53._col0=RS_56._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=37584000 width=11) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=37584000 width=11) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_54] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_2] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out new file mode 100644 index 0000000000..c7b1c9a6bb --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out @@ -0,0 +1,540 @@ +Warning: Shuffle Join MERGEJOIN[445][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[448][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[449][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 25' is a cross product +PREHOOK: query: explain +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 33 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 15 <- Reducer 29 (BROADCAST_EDGE) +Map 37 <- Reducer 7 (BROADCAST_EDGE) +Map 39 <- Reducer 36 (BROADCAST_EDGE) +Map 41 <- Reducer 14 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) +Map 42 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 41 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 26 (ONE_TO_ONE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 34 (ONE_TO_ONE_EDGE), Union 5 (CONTAINS) +Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 38 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE), Reducer 43 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 21 (ONE_TO_ONE_EDGE) +Reducer 30 <- Map 28 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 31 <- Map 40 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Reducer 31 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 38 <- Map 37 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 32 (ONE_TO_ONE_EDGE), Union 5 (CONTAINS) +Reducer 43 <- Map 42 (SIMPLE_EDGE) +Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_543] + Limit [LIM_542] (rows=1 width=112) + Number of rows:100 + Group By Operator [GBY_541] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Union 5 [CUSTOM_SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] + Reduce Output Operator [RS_462] + Group By Operator [GBY_461] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(_col0)"] + Select Operator [SEL_459] (rows=52 width=112) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_458] (rows=52 width=2) + Conds:RS_200._col1=RS_549._col0(Inner),Output:["_col3","_col4"] + <-Reducer 34 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_549] + PartitionCols:_col0 + Select Operator [SEL_548] (rows=745 width=4) + Output:["_col0"] + Filter Operator [FIL_547] (rows=745 width=12) + predicate:(_col1 > 4L) + Group By Operator [GBY_546] (rows=2235 width=12) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_190] + PartitionCols:_col0 + Group By Operator [GBY_87] (rows=2235 width=12) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col4 + Merge Join Operator [MERGEJOIN_439] (rows=19646398 width=4) + Conds:RS_83._col1=RS_491._col0(Inner),Output:["_col4"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_491] + PartitionCols:_col0 + Select Operator [SEL_490] (rows=462000 width=188) + Output:["_col0"] + TableScan [TS_78] (rows=462000 width=4) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_438] (rows=19646398 width=4) + Conds:RS_489._col0=RS_479._col0(Inner),Output:["_col1"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_479] + PartitionCols:_col0 + Select Operator [SEL_476] (rows=2609 width=4) + Output:["_col0"] + Filter Operator [FIL_475] (rows=2609 width=8) + predicate:(d_year) IN (1999, 2000, 2001, 2002) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_489] + PartitionCols:_col0 + Select Operator [SEL_488] (rows=550076554 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_487] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_72] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] + <-Reducer 36 [BROADCAST_EDGE] vectorized + BROADCAST [RS_486] + Group By Operator [GBY_485] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_484] + Group By Operator [GBY_482] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_480] (rows=2609 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_476] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_200] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_450] (rows=3941102 width=118) + Conds:RS_197._col2=RS_576._col0(Inner),Output:["_col1","_col3","_col4"] + <-Reducer 10 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_197] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_440] (rows=3941102 width=122) + Conds:RS_557._col0=RS_467._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_467] + PartitionCols:_col0 + Select Operator [SEL_464] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_463] (rows=50 width=12) + predicate:((d_moy = 1) and (d_year = 1999)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_557] + PartitionCols:_col0 + Select Operator [SEL_556] (rows=143930993 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_555] (rows=143930993 width=127) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_201_item_i_item_sk_min) AND DynamicValue(RS_201_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_201_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_195_date_dim_d_date_sk_min) AND DynamicValue(RS_195_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_195_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_102] (rows=144002668 width=127) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_545] + Group By Operator [GBY_544] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_472] + Group By Operator [GBY_470] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_468] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_464] + <-Reducer 35 [BROADCAST_EDGE] vectorized + BROADCAST [RS_554] + Group By Operator [GBY_553] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_552] + Group By Operator [GBY_551] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_550] (rows=745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_548] + <-Reducer 26 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_576] + PartitionCols:_col0 + Group By Operator [GBY_575] (rows=235937 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_171] + PartitionCols:_col0 + Group By Operator [GBY_170] (rows=235937 width=3) + Output:["_col0"],keys:_col2 + Select Operator [SEL_169] (rows=471875 width=227) + Output:["_col2"] + Filter Operator [FIL_168] (rows=471875 width=227) + predicate:(_col3 > _col1) + Merge Join Operator [MERGEJOIN_449] (rows=1415626 width=227) + Conds:(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_165] + Merge Join Operator [MERGEJOIN_448] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_563] + Select Operator [SEL_562] (rows=1 width=8) + Filter Operator [FIL_561] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_560] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_559] (rows=1 width=8) + Group By Operator [GBY_558] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_520] + Group By Operator [GBY_516] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_512] (rows=50562 width=112) + Output:["_col0"] + Group By Operator [GBY_509] (rows=50562 width=112) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=455058 width=112) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col1 + Merge Join Operator [MERGEJOIN_436] (rows=18762463 width=112) + Conds:RS_508._col0=RS_477._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_477] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_476] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_508] + PartitionCols:_col0 + Select Operator [SEL_507] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_506] (rows=525327388 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_6] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_505] + Group By Operator [GBY_504] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_483] + Group By Operator [GBY_481] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_478] (rows=2609 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_476] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_566] + Select Operator [SEL_565] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_564] (rows=1 width=112) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_521] + Group By Operator [GBY_517] (rows=1 width=112) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_513] (rows=50562 width=112) + Output:["_col1"] + Please refer to the previous Group By Operator [GBY_509] + <-Reducer 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_574] + Group By Operator [GBY_573] (rows=1415626 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_572] + PartitionCols:_col0 + Group By Operator [GBY_571] (rows=550080312 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_570] (rows=550080312 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_569] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_197_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_197_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_197_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_154] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_568] + Group By Operator [GBY_567] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_414] + Group By Operator [GBY_413] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_412] (rows=3941102 width=7) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_440] + <-Reducer 4 [CONTAINS] + Reduce Output Operator [RS_457] + Group By Operator [GBY_456] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(_col0)"] + Select Operator [SEL_454] (rows=102 width=112) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_453] (rows=102 width=1) + Conds:RS_98._col2=RS_495._col0(Inner),Output:["_col3","_col4"] + <-Reducer 32 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_495] + PartitionCols:_col0 + Select Operator [SEL_494] (rows=745 width=4) + Output:["_col0"] + Filter Operator [FIL_493] (rows=745 width=12) + predicate:(_col1 > 4L) + Group By Operator [GBY_492] (rows=2235 width=12) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col0 + Please refer to the previous Group By Operator [GBY_87] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_447] (rows=7751875 width=98) + Conds:RS_95._col1=RS_540._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_95] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_435] (rows=7751875 width=101) + Conds:RS_503._col0=RS_465._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_465] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_464] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_503] + PartitionCols:_col0 + Select Operator [SEL_502] (rows=285117831 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_501] (rows=285117831 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_99_item_i_item_sk_min) AND DynamicValue(RS_99_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_99_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_93_date_dim_d_date_sk_min) AND DynamicValue(RS_93_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_93_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_500] + Group By Operator [GBY_499] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + FORWARD [RS_498] + Group By Operator [GBY_497] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_496] (rows=745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_494] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_474] + Group By Operator [GBY_473] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_471] + Group By Operator [GBY_469] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_466] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_464] + <-Reducer 21 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_540] + PartitionCols:_col0 + Group By Operator [GBY_539] (rows=235937 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=235937 width=3) + Output:["_col0"],keys:_col2 + Select Operator [SEL_67] (rows=471875 width=227) + Output:["_col2"] + Filter Operator [FIL_66] (rows=471875 width=227) + predicate:(_col3 > _col1) + Merge Join Operator [MERGEJOIN_446] (rows=1415626 width=227) + Conds:(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 19 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_63] + Merge Join Operator [MERGEJOIN_445] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_527] + Select Operator [SEL_526] (rows=1 width=8) + Filter Operator [FIL_525] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_524] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_523] (rows=1 width=8) + Group By Operator [GBY_522] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_518] + Group By Operator [GBY_514] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_510] (rows=50562 width=112) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_509] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_530] + Select Operator [SEL_529] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_528] (rows=1 width=112) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_519] + Group By Operator [GBY_515] (rows=1 width=112) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_511] (rows=50562 width=112) + Output:["_col1"] + Please refer to the previous Group By Operator [GBY_509] + <-Reducer 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_538] + Group By Operator [GBY_537] (rows=1415626 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_536] + PartitionCols:_col0 + Group By Operator [GBY_535] (rows=550080312 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_534] (rows=550080312 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_533] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_95_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_95_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_95_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_52] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_532] + Group By Operator [GBY_531] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_339] (rows=7751875 width=6) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_435] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out new file mode 100644 index 0000000000..e4237f14c0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out @@ -0,0 +1,351 @@ +Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 8 <- Reducer 14 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 21 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 23 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 16 <- Map 20 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 23 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 21 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_92] + Select Operator [SEL_91] (rows=27708538 width=380) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_90] (rows=27708538 width=492) + predicate:(_col3 > _col4) + Merge Join Operator [MERGEJOIN_291] (rows=83125614 width=492) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_332] + Select Operator [SEL_331] (rows=83125614 width=380) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_330] (rows=83125614 width=380) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col1, _col7, _col8 + Select Operator [SEL_329] (rows=309360422049 width=843) + Output:["_col1","_col7","_col8","_col9"] + Group By Operator [GBY_328] (rows=309360422049 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_35] (rows=309360422049 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col4, _col11, _col12, _col0, _col5, _col7, _col8, _col9, _col10 + Merge Join Operator [MERGEJOIN_285] (rows=309360422049 width=843) + Conds:RS_294._col1, _col2=RS_32._col3, _col10(Inner),Output:["_col0","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + PartitionCols:_col1, _col2 + Select Operator [SEL_293] (rows=40000000 width=359) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_292] (rows=40000000 width=272) + predicate:(ca_zip is not null and upper(ca_country) is not null) + TableScan [TS_0] (rows=40000000 width=272) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_state","ca_zip","ca_country"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col3, _col10 + Select Operator [SEL_30] (rows=78430601 width=905) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_284] (rows=78430601 width=905) + Conds:RS_27._col1=RS_326._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col12","_col13","_col14"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_326] + PartitionCols:_col0 + Select Operator [SEL_325] (rows=80000000 width=276) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_324] (rows=80000000 width=276) + predicate:c_birth_country is not null + TableScan [TS_21] (rows=80000000 width=276) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_283] (rows=78430601 width=636) + Conds:RS_24._col0=RS_312._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=7000 width=295) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_308] (rows=7000 width=384) + predicate:(i_color = 'orchid') + TableScan [TS_18] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=537799796 width=377) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_282] (rows=537799796 width=377) + Conds:RS_14._col0, _col3=RS_322._col0, _col1(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0, _col1 + Select Operator [SEL_321] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_9] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_281] (rows=385681992 width=379) + Conds:RS_320._col2=RS_298._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] + PartitionCols:_col0 + Select Operator [SEL_297] (rows=155 width=267) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_296] (rows=155 width=271) + predicate:((s_market_id = 7) and s_zip is not null) + TableScan [TS_6] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] + PartitionCols:_col2 + Select Operator [SEL_319] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_318] (rows=525333486 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_12_store_s_store_sk_min) AND DynamicValue(RS_12_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_12_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_307] + Group By Operator [GBY_306] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] + Group By Operator [GBY_302] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_299] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_297] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] + Group By Operator [GBY_314] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_313] (rows=7000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_310] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Select Operator [SEL_343] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_342] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_340] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] + Select Operator [SEL_339] (rows=2121289008973 width=932) + Output:["_col10"] + Group By Operator [GBY_338] (rows=2121289008973 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Group By Operator [GBY_78] (rows=2121289008973 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col0)"],keys:_col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 + Merge Join Operator [MERGEJOIN_290] (rows=2121289008973 width=932) + Conds:RS_74._col3, _col11=RS_295._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col1, _col2 + Please refer to the previous Select Operator [SEL_293] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col3, _col11 + Select Operator [SEL_70] (rows=537799796 width=1023) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_289] (rows=537799796 width=1023) + Conds:RS_67._col4=RS_311._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col15"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + PartitionCols:_col0 + Select Operator [SEL_309] (rows=462000 width=384) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Please refer to the previous TableScan [TS_18] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_288] (rows=537799796 width=646) + Conds:RS_327._col0=RS_65._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_325] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col1 + Select Operator [SEL_61] (rows=537799796 width=377) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_287] (rows=537799796 width=377) + Conds:RS_58._col0, _col3=RS_323._col0, _col1(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_286] (rows=385681992 width=379) + Conds:RS_337._col2=RS_300._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_297] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_337] + PartitionCols:_col2 + Select Operator [SEL_336] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_335] (rows=525333486 width=122) + predicate:((ss_store_sk BETWEEN DynamicValue(RS_56_store_s_store_sk_min) AND DynamicValue(RS_56_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_56_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_47] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_334] + Group By Operator [GBY_333] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_305] + Group By Operator [GBY_303] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_301] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_297] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out new file mode 100644 index 0000000000..8f27cced27 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out @@ -0,0 +1,273 @@ +PREHOOK: query: explain +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 5 <- Reducer 15 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 17 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_249] + Limit [LIM_248] (rows=100 width=808) + Number of rows:100 + Select Operator [SEL_247] (rows=88265283 width=808) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + Group By Operator [GBY_245] (rows=88265283 width=808) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_47] (rows=88265283 width=808) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)"],keys:_col1, _col2, _col7, _col8 + Top N Key Operator [TNK_98] (rows=88265283 width=776) + keys:_col1, _col2, _col7, _col8,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_212] (rows=88265283 width=776) + Conds:RS_214._col0=RS_44._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=88265283 width=496) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_211] (rows=88265283 width=496) + Conds:RS_39._col1=RS_244._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col8","_col9"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col0 + Select Operator [SEL_243] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_34] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_210] (rows=88265283 width=311) + Conds:RS_36._col4=RS_224._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=351 width=4) + Output:["_col0"] + Filter Operator [FIL_217] (rows=351 width=12) + predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) + TableScan [TS_18] (rows=73049 width=12) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col4 + Select Operator [SEL_30] (rows=462456048 width=341) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_209] (rows=462456048 width=341) + Conds:RS_27._col4=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=351 width=4) + Output:["_col0"] + Filter Operator [FIL_216] (rows=351 width=12) + predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) + Please refer to the previous TableScan [TS_18] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_208] (rows=2637748473 width=350) + Conds:RS_24._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_215] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 2000)) + Please refer to the previous TableScan [TS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=96331893945 width=355) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_207] (rows=96331893945 width=355) + Conds:RS_14._col7, _col8=RS_242._col2, _col1(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col10","_col11","_col14"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] + PartitionCols:_col2, _col1 + Select Operator [SEL_241] (rows=285117831 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_240] (rows=285117831 width=123) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_37_d3_d_date_sk_min) AND DynamicValue(RS_37_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_37_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_8] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + Group By Operator [GBY_227] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_225] (rows=351 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_220] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col7, _col8 + Merge Join Operator [MERGEJOIN_206] (rows=478292911 width=241) + Conds:RS_234._col1, _col2, _col4=RS_237._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_236] (rows=53632139 width=123) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_235] (rows=53632139 width=123) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_5] (rows=57591150 width=123) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_233] (rows=501694138 width=126) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_232] (rows=501694138 width=126) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=126) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_222] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_218] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out new file mode 100644 index 0000000000..2578ead416 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out @@ -0,0 +1,173 @@ +PREHOOK: query: explain +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_129] + Limit [LIM_128] (rows=100 width=444) + Number of rows:100 + Select Operator [SEL_127] (rows=310774 width=444) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + Select Operator [SEL_125] (rows=310774 width=444) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_124] (rows=310774 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=462000 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col5)","count(_col5)","sum(_col4)","count(_col4)"],keys:_col8 + Top N Key Operator [TNK_55] (rows=809521 width=100) + keys:_col8,sort order:+,top n:100 + Merge Join Operator [MERGEJOIN_99] (rows=809521 width=100) + Conds:RS_24._col0=RS_123._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_122] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_19] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_98] (rows=809521 width=4) + Conds:RS_21._col1=RS_121._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + PartitionCols:_col0 + Select Operator [SEL_120] (rows=2300 width=4) + Output:["_col0"] + Filter Operator [FIL_119] (rows=2300 width=174) + predicate:((p_channel_email = 'N') or (p_channel_event = 'N')) + TableScan [TS_16] (rows=2300 width=174) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Select Operator [SEL_15] (rows=809521 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_97] (rows=809521 width=4) + Conds:RS_12._col0=RS_110._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + PartitionCols:_col0 + Select Operator [SEL_109] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_108] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_96] (rows=2283326 width=135) + Conds:RS_118._col1=RS_102._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=14776 width=4) + Output:["_col0"] + Filter Operator [FIL_100] (rows=14776 width=268) + predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W')) + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + PartitionCols:_col1 + Select Operator [SEL_117] (rows=283691050 width=354) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_116] (rows=283691050 width=354) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_10_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_10_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_10_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=354) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_111] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_104] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_103] (rows=14776 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_101] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out new file mode 100644 index 0000000000..aee662ceb6 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out @@ -0,0 +1,191 @@ +PREHOOK: query: explain +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_135] + Limit [LIM_134] (rows=100 width=538) + Number of rows:100 + Select Operator [SEL_133] (rows=4281825 width=538) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + Select Operator [SEL_131] (rows=4281825 width=538) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_130] (rows=4281825 width=570) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_29] (rows=4281825 width=570) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_56] (rows=1427275 width=186) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_27] (rows=1427275 width=186) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_100] (rows=1427275 width=186) + Conds:RS_24._col0=RS_129._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col7","_col9"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_19] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_99] (rows=1427275 width=90) + Conds:RS_21._col1=RS_119._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + PartitionCols:_col0 + Select Operator [SEL_118] (rows=209 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_117] (rows=209 width=90) + predicate:(s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') + TableScan [TS_16] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Select Operator [SEL_15] (rows=1441779 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_98] (rows=1441779 width=4) + Conds:RS_12._col0=RS_111._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] + PartitionCols:_col0 + Select Operator [SEL_110] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_109] (rows=652 width=8) + predicate:(d_year = 2001) + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_97] (rows=4037920 width=4) + Conds:RS_127._col2=RS_103._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_103] + PartitionCols:_col0 + Select Operator [SEL_102] (rows=14776 width=4) + Output:["_col0"] + Filter Operator [FIL_101] (rows=14776 width=268) + predicate:((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U')) + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col2 + Select Operator [SEL_126] (rows=501690006 width=340) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_125] (rows=501690006 width=340) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_10_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_10_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_10_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=340) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_112] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_110] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_120] (rows=209 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_118] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_106] + Group By Operator [GBY_105] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_104] (rows=14776 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_102] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out new file mode 100644 index 0000000000..c7fd970eae --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out @@ -0,0 +1,296 @@ +Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 1 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 1 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 1 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 + File Output Operator [FS_59] + Limit [LIM_58] (rows=1 width=768) + Number of rows:100 + Select Operator [SEL_57] (rows=1 width=768) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + Merge Join Operator [MERGEJOIN_106] (rows=1 width=768) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_166] + Select Operator [SEL_165] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_164] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_161] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] + PartitionCols:_col0 + Group By Operator [GBY_124] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_118] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_112] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) + TableScan [TS_0] (rows=575995635 width=324) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_54] + Merge Join Operator [MERGEJOIN_105] (rows=1 width=640) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_160] + Select Operator [SEL_159] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_158] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_155] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Group By Operator [GBY_123] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_117] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_111] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) + Please refer to the previous TableScan [TS_0] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_51] + Merge Join Operator [MERGEJOIN_104] (rows=1 width=512) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_154] + Select Operator [SEL_153] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_152] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_149] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Group By Operator [GBY_122] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_116] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_110] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) + Please refer to the previous TableScan [TS_0] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_48] + Merge Join Operator [MERGEJOIN_103] (rows=1 width=384) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_148] + Select Operator [SEL_147] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_146] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_145] + Group By Operator [GBY_144] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_143] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col0 + Group By Operator [GBY_121] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_115] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_109] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) + Please refer to the previous TableScan [TS_0] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_45] + Merge Join Operator [MERGEJOIN_102] (rows=1 width=256) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_142] + Select Operator [SEL_141] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_140] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_139] + Group By Operator [GBY_138] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_137] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Group By Operator [GBY_120] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_114] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_108] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) + Please refer to the previous TableScan [TS_0] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_136] + Select Operator [SEL_135] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_134] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Group By Operator [GBY_132] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_131] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_119] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_113] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_107] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) + Please refer to the previous TableScan [TS_0] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out new file mode 100644 index 0000000000..a7a6555937 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out @@ -0,0 +1,272 @@ +PREHOOK: query: explain +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 13 <- Reducer 17 (BROADCAST_EDGE) +Map 5 <- Reducer 15 (BROADCAST_EDGE) +Reducer 10 <- Map 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_249] + Limit [LIM_248] (rows=100 width=496) + Number of rows:100 + Select Operator [SEL_247] (rows=7094732 width=496) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + Group By Operator [GBY_245] (rows=7094732 width=496) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_47] (rows=7094732 width=496) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)"],keys:_col1, _col2, _col7, _col8 + Top N Key Operator [TNK_98] (rows=7094732 width=478) + keys:_col1, _col2, _col7, _col8,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_212] (rows=7094732 width=478) + Conds:RS_214._col0=RS_44._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=7094732 width=198) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_211] (rows=7094732 width=198) + Conds:RS_39._col1=RS_244._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col8","_col9"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col0 + Select Operator [SEL_243] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_34] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_210] (rows=7094732 width=10) + Conds:RS_36._col4=RS_234._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_234] + PartitionCols:_col0 + Select Operator [SEL_233] (rows=1957 width=4) + Output:["_col0"] + Filter Operator [FIL_232] (rows=1957 width=8) + predicate:(d_year) IN (1999, 2000, 2001) + TableScan [TS_31] (rows=73049 width=8) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col4 + Select Operator [SEL_30] (rows=264825249 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_209] (rows=264825249 width=23) + Conds:RS_27._col4=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=201 width=4) + Output:["_col0"] + Filter Operator [FIL_216] (rows=201 width=12) + predicate:((d_year = 1999) and d_moy BETWEEN 4 AND 7) + TableScan [TS_18] (rows=73049 width=12) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_208] (rows=2637748473 width=27) + Conds:RS_24._col0=RS_219._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + PartitionCols:_col0 + Select Operator [SEL_217] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_215] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 1999)) + Please refer to the previous TableScan [TS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=96331893945 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_207] (rows=96331893945 width=31) + Conds:RS_14._col7, _col8=RS_242._col2, _col1(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col10","_col11","_col14"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] + PartitionCols:_col2, _col1 + Select Operator [SEL_241] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_240] (rows=285117831 width=15) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_37_d3_d_date_sk_min) AND DynamicValue(RS_37_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_37_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_8] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_237] + Group By Operator [GBY_236] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_235] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_233] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col7, _col8 + Merge Join Operator [MERGEJOIN_206] (rows=478292911 width=31) + Conds:RS_228._col1, _col2, _col4=RS_231._col1, _col2, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_230] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_229] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_5] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_227] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_226] (rows=501694138 width=23) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + Group By Operator [GBY_222] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_220] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_217] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query3.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query3.q.out new file mode 100644 index 0000000000..0cca24c23c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query3.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: explain +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_77] + Limit [LIM_76] (rows=100 width=220) + Number of rows:100 + Select Operator [SEL_75] (rows=274400 width=220) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] + Group By Operator [GBY_73] (rows=274400 width=220) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_16] (rows=274400 width=220) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col7, _col4, _col5 + Merge Join Operator [MERGEJOIN_53] (rows=589741 width=108) + Conds:RS_12._col0=RS_64._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_64] + PartitionCols:_col0 + Select Operator [SEL_63] (rows=5619 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_62] (rows=5619 width=12) + predicate:(d_moy = 12) + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,dt,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_52] (rows=7666836 width=104) + Conds:RS_72._col1=RS_56._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=669 width=107) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_54] (rows=669 width=111) + predicate:(i_manufact_id = 436) + TableScan [TS_3] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] + PartitionCols:_col1 + Select Operator [SEL_71] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_70] (rows=550076554 width=114) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_10_item_i_item_sk_min) AND DynamicValue(RS_10_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_10_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_dt_d_date_sk_min) AND DynamicValue(RS_13_dt_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_dt_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_61] + Group By Operator [GBY_60] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_59] + Group By Operator [GBY_58] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_57] (rows=669 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_55] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_67] + Group By Operator [GBY_66] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_65] (rows=5619 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_63] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out new file mode 100644 index 0000000000..7924edd724 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: explain +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 5 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_210] + Limit [LIM_209] (rows=100 width=942) + Number of rows:100 + Select Operator [SEL_208] (rows=691171 width=942) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_63] + Select Operator [SEL_62] (rows=691171 width=942) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_177] (rows=691171 width=942) + Conds:RS_59._col0=RS_60._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_171] (rows=1568628 width=834) + Conds:RS_180._col2=RS_187._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=784314 width=4) + Output:["_col0"] + Filter Operator [FIL_181] (rows=784314 width=90) + predicate:(ca_state = 'IL') + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_180] + PartitionCols:_col2 + Select Operator [SEL_179] (rows=80000000 width=849) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_178] (rows=80000000 width=849) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=849) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=704993 width=227) + Output:["_col0","_col2"] + Filter Operator [FIL_54] (rows=704993 width=227) + predicate:(_col2 > _col3) + Merge Join Operator [MERGEJOIN_176] (rows=2114980 width=227) + Conds:RS_202._col1=RS_207._col1(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_207] + PartitionCols:_col1 + Select Operator [SEL_206] (rows=6 width=198) + Output:["_col0","_col1"] + Group By Operator [GBY_205] (rows=6 width=206) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 + Select Operator [SEL_204] (rows=2537976 width=201) + Output:["_col0","_col2"] + Group By Operator [GBY_203] (rows=2537976 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Group By Operator [GBY_42] (rows=3923529 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 + Merge Join Operator [MERGEJOIN_175] (rows=3923529 width=184) + Conds:RS_38._col2=RS_189._col0(Inner),Output:["_col1","_col3","_col6"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0 + Select Operator [SEL_186] (rows=40000000 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_183] (rows=40000000 width=90) + predicate:ca_state is not null + Please refer to the previous TableScan [TS_3] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_174] (rows=3923529 width=101) + Conds:RS_195._col0=RS_199._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=13130761 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_191] (rows=13130761 width=118) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) + TableScan [TS_6] (rows=14398467 width=118) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] + PartitionCols:_col0 + Select Operator [SEL_197] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_196] (rows=652 width=8) + predicate:(d_year = 2002) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_202] + PartitionCols:_col1 + Select Operator [SEL_201] (rows=2114980 width=201) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_200] (rows=2114980 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=3746772 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 + Merge Join Operator [MERGEJOIN_173] (rows=3746772 width=184) + Conds:RS_18._col2=RS_188._col0(Inner),Output:["_col1","_col3","_col6"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_188] + PartitionCols:_col0 + Select Operator [SEL_185] (rows=40000000 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_182] (rows=40000000 width=90) + predicate:ca_state is not null + Please refer to the previous TableScan [TS_3] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_172] (rows=3746772 width=101) + Conds:RS_194._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_192] (rows=12539215 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_190] (rows=12539215 width=118) + predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_197] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out new file mode 100644 index 0000000000..20b52aaded --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out @@ -0,0 +1,498 @@ +PREHOOK: query: explain +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select /* tt */ + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE) +Map 33 <- Reducer 15 (BROADCAST_EDGE) +Map 34 <- Reducer 19 (BROADCAST_EDGE) +Map 35 <- Reducer 23 (BROADCAST_EDGE) +Map 36 <- Reducer 27 (BROADCAST_EDGE) +Map 37 <- Reducer 31 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) +Reducer 13 <- Map 32 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 17 <- Map 32 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 20 <- Map 10 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 21 <- Map 32 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 10 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 25 <- Map 32 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 10 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 29 <- Map 32 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 32 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 8 <- Reducer 26 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +Reducer 9 <- Reducer 30 (ONE_TO_ONE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 + File Output Operator [FS_140] + Select Operator [SEL_139] (rows=110 width=550) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_138] (rows=110 width=670) + predicate:CASE WHEN (_col6) THEN (CASE WHEN (_col7) THEN (((_col10 / _col2) > _col5)) ELSE (_col8) END) ELSE (CASE WHEN (_col7) THEN (((_col10 / _col2) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_465] (rows=220 width=670) + Conds:RS_135._col1=RS_555._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Reducer 30 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_555] + PartitionCols:_col0 + Group By Operator [GBY_554] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_132] + PartitionCols:_col0 + Group By Operator [GBY_131] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_460] (rows=10246882 width=209) + Conds:RS_127._col1=RS_514._col0(Inner),Output:["_col2","_col5"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_514] + PartitionCols:_col0 + Select Operator [SEL_508] (rows=40000000 width=102) + Output:["_col0","_col1"] + Filter Operator [FIL_507] (rows=40000000 width=102) + predicate:ca_county is not null + TableScan [TS_6] (rows=40000000 width=102) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_127] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_459] (rows=10246882 width=115) + Conds:RS_553._col0=RS_488._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_488] + PartitionCols:_col0 + Select Operator [SEL_477] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_471] (rows=130 width=12) + predicate:((d_qoy = 3) and (d_year = 2000)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_553] + PartitionCols:_col0 + Select Operator [SEL_552] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_551] (rows=143931246 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_125_date_dim_d_date_sk_min) AND DynamicValue(RS_125_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_125_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_115] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_550] + Group By Operator [GBY_549] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_501] + Group By Operator [GBY_495] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_489] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_477] + <-Reducer 8 [ONE_TO_ONE_EDGE] + FORWARD [RS_135] + PartitionCols:_col1 + Select Operator [SEL_114] (rows=220 width=656) + Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_113] (rows=220 width=570) + predicate:CASE WHEN (_col5) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > _col1)) ELSE (_col6) END) ELSE (CASE WHEN (_col9) THEN (((_col11 / _col8) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_464] (rows=440 width=570) + Conds:RS_110._col0=RS_548._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col12"] + <-Reducer 26 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_548] + PartitionCols:_col0 + Select Operator [SEL_547] (rows=440 width=214) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_546] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col0 + Group By Operator [GBY_103] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_458] (rows=10246882 width=209) + Conds:RS_99._col1=RS_513._col0(Inner),Output:["_col2","_col5"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_513] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_508] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_457] (rows=10246882 width=115) + Conds:RS_545._col0=RS_486._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_486] + PartitionCols:_col0 + Select Operator [SEL_476] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_470] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000)) + Please refer to the previous TableScan [TS_3] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_545] + PartitionCols:_col0 + Select Operator [SEL_544] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_543] (rows=143931246 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_87] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_542] + Group By Operator [GBY_541] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_500] + Group By Operator [GBY_494] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_487] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_476] + <-Reducer 7 [ONE_TO_ONE_EDGE] + FORWARD [RS_110] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_463] (rows=440 width=454) + Conds:RS_107._col0=RS_540._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] + <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_540] + PartitionCols:_col0 + Select Operator [SEL_539] (rows=440 width=214) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_538] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col0 + Group By Operator [GBY_83] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_456] (rows=10246882 width=209) + Conds:RS_79._col1=RS_512._col0(Inner),Output:["_col2","_col5"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_512] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_508] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_455] (rows=10246882 width=115) + Conds:RS_537._col0=RS_484._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_484] + PartitionCols:_col0 + Select Operator [SEL_475] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_469] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2000)) + Please refer to the previous TableScan [TS_3] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_537] + PartitionCols:_col0 + Select Operator [SEL_536] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_535] (rows=143931246 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_77_date_dim_d_date_sk_min) AND DynamicValue(RS_77_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_77_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_67] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_534] + Group By Operator [GBY_533] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_499] + Group By Operator [GBY_493] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_485] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_475] + <-Reducer 6 [ONE_TO_ONE_EDGE] + FORWARD [RS_107] + PartitionCols:_col0 + Select Operator [SEL_66] (rows=1605 width=338) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_462] (rows=1605 width=442) + Conds:RS_63._col0=RS_532._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7"] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_532] + PartitionCols:_col0 + Group By Operator [GBY_531] (rows=1605 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0 + Group By Operator [GBY_56] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_454] (rows=37399561 width=139) + Conds:RS_52._col1=RS_511._col0(Inner),Output:["_col2","_col5"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_511] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_508] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_453] (rows=37399561 width=42) + Conds:RS_530._col0=RS_482._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_482] + PartitionCols:_col0 + Select Operator [SEL_474] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_468] (rows=130 width=12) + predicate:((d_qoy = 3) and (d_year = 2000)) + Please refer to the previous TableScan [TS_3] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_530] + PartitionCols:_col0 + Select Operator [SEL_529] (rows=525327191 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_528] (rows=525327191 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_40] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_527] + Group By Operator [GBY_526] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_498] + Group By Operator [GBY_492] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_483] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_474] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_63] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_461] (rows=1605 width=330) + Conds:RS_517._col0=RS_525._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_525] + PartitionCols:_col0 + Select Operator [SEL_524] (rows=1605 width=214) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_523] (rows=1605 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Group By Operator [GBY_36] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_452] (rows=37399561 width=139) + Conds:RS_32._col1=RS_510._col0(Inner),Output:["_col2","_col5"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_510] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_508] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_451] (rows=37399561 width=42) + Conds:RS_522._col0=RS_480._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_480] + PartitionCols:_col0 + Select Operator [SEL_473] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_467] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2000)) + Please refer to the previous TableScan [TS_3] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_522] + PartitionCols:_col0 + Select Operator [SEL_521] (rows=525327191 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_520] (rows=525327191 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_20] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_519] + Group By Operator [GBY_518] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_497] + Group By Operator [GBY_491] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_481] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_473] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_517] + PartitionCols:_col0 + Select Operator [SEL_516] (rows=1605 width=214) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_515] (rows=1605 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_450] (rows=37399561 width=139) + Conds:RS_12._col1=RS_509._col0(Inner),Output:["_col2","_col5"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_509] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_508] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_449] (rows=37399561 width=42) + Conds:RS_506._col0=RS_478._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_478] + PartitionCols:_col0 + Select Operator [SEL_472] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_466] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000)) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_506] + PartitionCols:_col0 + Select Operator [SEL_505] (rows=525327191 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_504] (rows=525327191 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_503] + Group By Operator [GBY_502] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_496] + Group By Operator [GBY_490] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_479] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_472] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query32.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query32.q.out new file mode 100644 index 0000000000..d909c0254d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query32.q.out @@ -0,0 +1,210 @@ +PREHOOK: query: explain +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = cs_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = cs_sold_date_sk + ) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Map 12 <- Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 11 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_141] + Limit [LIM_140] (rows=1 width=112) + Number of rows:100 + Group By Operator [GBY_139] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_36] + Group By Operator [GBY_35] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(_col2)"] + Select Operator [SEL_34] (rows=2478 width=112) + Output:["_col2"] + Filter Operator [FIL_33] (rows=2478 width=112) + predicate:(_col2 > _col5) + Merge Join Operator [MERGEJOIN_104] (rows=7434 width=112) + Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_101] (rows=31836679 width=110) + Conds:RS_128._col0=RS_107._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_105] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Select Operator [SEL_127] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_126] (rows=286549727 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_120] (rows=669 width=4) + Output:["_col0"] + Select Operator [SEL_118] (rows=669 width=4) + Output:["_col0"] + Filter Operator [FIL_117] (rows=669 width=7) + predicate:(i_manufact_id = 269) + TableScan [TS_20] (rows=462000 width=7) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_108] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 10 [ONE_TO_ONE_EDGE] + FORWARD [RS_31] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_103] (rows=97 width=116) + Conds:RS_138._col0=RS_119._col0(Inner),Output:["_col1","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_119] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_118] + <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=6951 width=116) + Output:["_col0","_col1"] + Group By Operator [GBY_136] (rows=6951 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=97314 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Merge Join Operator [MERGEJOIN_102] (rows=31836679 width=110) + Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_106] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + PartitionCols:_col0 + Select Operator [SEL_134] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_133] (rows=286549727 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_30_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_30_catalog_sales_cs_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_30_catalog_sales_cs_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_125] + Please refer to the previous Group By Operator [GBY_123] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_110] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 5 [BROADCAST_EDGE] vectorized + BROADCAST [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_89] (rows=31836679 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_101] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out new file mode 100644 index 0000000000..c82c41575d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out @@ -0,0 +1,450 @@ +PREHOOK: query: explain +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 14 <- Reducer 18 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) +Map 29 <- Reducer 21 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE) +Map 30 <- Reducer 24 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) +Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 13 <- Map 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE) +Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_372] + Limit [LIM_371] (rows=59 width=115) + Number of rows:100 + Select Operator [SEL_370] (rows=59 width=115) + Output:["_col0","_col1"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_369] + Group By Operator [GBY_368] (rows=59 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_392] + PartitionCols:_col0 + Group By Operator [GBY_391] (rows=59 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_390] (rows=19 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col0 + Group By Operator [GBY_108] (rows=19 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_308] (rows=11364 width=3) + Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_297] (rows=461514 width=7) + Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col1 + Select Operator [SEL_322] (rows=460848 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_321] (rows=460848 width=7) + predicate:i_manufact_id is not null + TableScan [TS_0] (rows=462000 width=7) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_329] + PartitionCols:_col0 + Group By Operator [GBY_328] (rows=692 width=3) + Output:["_col0"],keys:KEY._col0 + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] + PartitionCols:_col0 + Group By Operator [GBY_326] (rows=692 width=3) + Output:["_col0"],keys:i_manufact_id + Select Operator [SEL_325] (rows=46085 width=93) + Output:["i_manufact_id"] + Filter Operator [FIL_324] (rows=46085 width=93) + predicate:((i_category = 'Books') and i_manufact_id is not null) + TableScan [TS_3] (rows=462000 width=93) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_manufact_id"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col2 + Select Operator [SEL_100] (rows=788222 width=110) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_305] (rows=788222 width=110) + Conds:RS_97._col2=RS_352._col0(Inner),Output:["_col1","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_352] + PartitionCols:_col0 + Select Operator [SEL_347] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_346] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -6) + TableScan [TS_16] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_304] (rows=3941109 width=118) + Conds:RS_389._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_336] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_330] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_389] + PartitionCols:_col0 + Select Operator [SEL_388] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_387] (rows=143931246 width=123) + predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_98_customer_address_ca_address_sk_min) AND DynamicValue(RS_98_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_98_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_85] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_384] + Group By Operator [GBY_383] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_337] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_331] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_386] + Group By Operator [GBY_385] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_359] + Group By Operator [GBY_356] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_353] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_347] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_367] + PartitionCols:_col0 + Group By Operator [GBY_366] (rows=59 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_365] (rows=64 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Group By Operator [GBY_33] (rows=64 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_306] (rows=41476 width=3) + Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col2 + Select Operator [SEL_25] (rows=2876890 width=4) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_299] (rows=2876890 width=4) + Conds:RS_22._col2=RS_348._col0(Inner),Output:["_col1","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_347] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_298] (rows=14384447 width=4) + Conds:RS_364._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_364] + PartitionCols:_col0 + Select Operator [SEL_363] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_362] (rows=525327191 width=118) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_23_customer_address_ca_address_sk_min) AND DynamicValue(RS_23_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_23_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_10] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_345] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_338] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_333] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_331] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_361] + Group By Operator [GBY_360] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_357] + Group By Operator [GBY_354] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_349] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_347] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_382] + PartitionCols:_col0 + Group By Operator [GBY_381] (rows=59 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_380] (rows=35 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=35 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_307] (rows=22352 width=3) + Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col3 + Select Operator [SEL_62] (rows=1550375 width=13) + Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_302] (rows=1550375 width=13) + Conds:RS_59._col1=RS_350._col0(Inner),Output:["_col2","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_347] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_301] (rows=7751872 width=98) + Conds:RS_379._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_334] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_379] + PartitionCols:_col0 + Select Operator [SEL_378] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_377] (rows=285117733 width=123) + predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_60_customer_address_ca_address_sk_min) AND DynamicValue(RS_60_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_60_customer_address_ca_address_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_47] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_374] + Group By Operator [GBY_373] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_339] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_335] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_331] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_376] + Group By Operator [GBY_375] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_358] + Group By Operator [GBY_355] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_351] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_347] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out new file mode 100644 index 0000000000..2f2137f7d4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out @@ -0,0 +1,205 @@ +PREHOOK: query: explain +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_135] + Select Operator [SEL_134] (rows=276068 width=364) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_35] + Select Operator [SEL_34] (rows=276068 width=364) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_100] (rows=276068 width=364) + Conds:RS_102._col0=RS_133._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=80000000 width=356) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_0] (rows=80000000 width=356) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_133] + PartitionCols:_col1 + Filter Operator [FIL_132] (rows=276068 width=12) + predicate:_col2 BETWEEN 15 AND 20 + Select Operator [SEL_131] (rows=5521356 width=12) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_130] (rows=5521356 width=12) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0, _col1 + Group By Operator [GBY_25] (rows=5521356 width=12) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_99] (rows=5521356 width=4) + Conds:RS_21._col1=RS_121._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + PartitionCols:_col0 + Select Operator [SEL_120] (rows=480 width=4) + Output:["_col0"] + Filter Operator [FIL_119] (rows=480 width=104) + predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END) + TableScan [TS_18] (rows=7200 width=104) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=82820326 width=9) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_98] (rows=82820326 width=9) + Conds:RS_14._col3=RS_113._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + PartitionCols:_col0 + Select Operator [SEL_112] (rows=112 width=4) + Output:["_col0"] + Filter Operator [FIL_111] (rows=112 width=102) + predicate:(s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') + TableScan [TS_8] (rows=1704 width=102) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_97] (rows=156119211 width=14) + Conds:RS_129._col0=RS_105._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_104] (rows=595 width=4) + Output:["_col0"] + Filter Operator [FIL_103] (rows=595 width=12) + predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002)) + TableScan [TS_5] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=479121995 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_127] (rows=479121995 width=19) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_22_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_22_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_22_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_106] (rows=595 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_104] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_114] (rows=112 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_112] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_122] (rows=480 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_120] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out new file mode 100644 index 0000000000..2501199e89 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out @@ -0,0 +1,361 @@ +PREHOOK: query: explain +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 23 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) +Map 24 <- Reducer 22 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 15 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 21 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_232] + Limit [LIM_231] (rows=1 width=352) + Number of rows:100 + Select Operator [SEL_230] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + Select Operator [SEL_228] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_227] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_64] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 + Top N Key Operator [TNK_102] (rows=67 width=276) + keys:_col4, _col6, _col7, _col8, _col9, _col10,sort order:++++++,top n:100 + Select Operator [SEL_63] (rows=67 width=276) + Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_62] (rows=67 width=276) + predicate:(_col12 is not null or _col14 is not null) + Merge Join Operator [MERGEJOIN_180] (rows=67 width=276) + Conds:RS_59._col0=RS_226._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col12","_col14"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_59] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_179] (rows=68 width=276) + Conds:RS_56._col0=RS_216._col0(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_56] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_178] (rows=162346 width=272) + Conds:RS_53._col0=RS_54._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col0 + Group By Operator [GBY_52] (rows=168231 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_16] (rows=62428523 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_175] (rows=62428523 width=2) + Conds:RS_206._col0=RS_190._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=217 width=4) + Output:["_col0"] + Filter Operator [FIL_188] (rows=217 width=12) + predicate:((d_qoy < 4) and (d_year = 1999)) + TableScan [TS_10] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_206] + PartitionCols:_col0 + Select Operator [SEL_205] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_204] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_14_date_dim_d_date_sk_min) AND DynamicValue(RS_14_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_14_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_7] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_203] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_199] + Group By Operator [GBY_196] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_191] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_189] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_174] (rows=78293105 width=272) + Conds:RS_48._col1=RS_187._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col0 + Select Operator [SEL_186] (rows=1861800 width=186) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_5] (rows=1861800 width=186) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_173] (rows=77201384 width=93) + Conds:RS_183._col2=RS_185._col0(Inner),Output:["_col0","_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col2 + Select Operator [SEL_182] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_181] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_185] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=40000000 width=90) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=168231 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_214] (rows=168231 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=168231 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=17104380 width=3) + Conds:RS_213._col0=RS_192._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_192] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_189] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_212] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_211] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_56_c_c_customer_sk_min) AND DynamicValue(RS_56_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_56_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_24_date_dim_d_date_sk_min) AND DynamicValue(RS_24_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_24_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_17] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_150] + Group By Operator [GBY_149] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_148] (rows=162346 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_178] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_208] + Group By Operator [GBY_207] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + Group By Operator [GBY_197] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_193] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_189] + <-Reducer 21 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_226] + PartitionCols:_col0 + Select Operator [SEL_225] (rows=167041 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_224] (rows=167041 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Group By Operator [GBY_41] (rows=167041 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=33642830 width=3) + Conds:RS_223._col0=RS_194._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_189] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + PartitionCols:_col0 + Select Operator [SEL_222] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_221] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_59_c_c_customer_sk_min) AND DynamicValue(RS_59_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_59_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_31] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_198] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_195] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_189] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_165] + Group By Operator [GBY_164] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_163] (rows=68 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_179] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out new file mode 100644 index 0000000000..98673a3c52 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out @@ -0,0 +1,184 @@ +PREHOOK: query: explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_113] + Limit [LIM_112] (rows=100 width=490) + Number of rows:100 + Select Operator [SEL_111] (rows=102 width=490) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + Select Operator [SEL_109] (rows=102 width=490) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_108] (rows=102 width=414) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col2 / _col3) ASC NULLS FIRST","partition by:":"(grouping(_col4, 1) + grouping(_col4, 0)), CASE WHEN ((grouping(_col4, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] + Select Operator [SEL_107] (rows=102 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] + PartitionCols:(grouping(_col4, 1) + grouping(_col4, 0)), CASE WHEN ((grouping(_col4, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END + Select Operator [SEL_105] (rows=102 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_104] (rows=102 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_23] (rows=2856 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1, 0L + Select Operator [SEL_21] (rows=30601888 width=232) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_82] (rows=30601888 width=232) + Conds:RS_18._col0=RS_93._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] + PartitionCols:_col0 + Select Operator [SEL_92] (rows=278 width=4) + Output:["_col0"] + Filter Operator [FIL_91] (rows=278 width=90) + predicate:(s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=187574154 width=381) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_81] (rows=187574154 width=381) + Conds:RS_11._col1=RS_103._col0(Inner),Output:["_col2","_col3","_col4","_col7","_col8"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] + PartitionCols:_col0 + Select Operator [SEL_102] (rows=462000 width=186) + Output:["_col0","_col1","_col2"] + TableScan [TS_6] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_80] (rows=187574154 width=203) + Conds:RS_101._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_85] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_83] (rows=652 width=8) + predicate:(d_year = 1999) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_100] (rows=525329897 width=225) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_99] (rows=525329897 width=225) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_9_d1_d_date_sk_min) AND DynamicValue(RS_9_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=225) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_98] + Group By Operator [GBY_97] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + Group By Operator [GBY_95] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_94] (rows=278 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_92] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_86] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_84] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out new file mode 100644 index 0000000000..381c0436a5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: explain +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_96] + Limit [LIM_95] (rows=100 width=396) + Number of rows:100 + Select Operator [SEL_94] (rows=2871 width=396) + Output:["_col0","_col1","_col2"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] + Group By Operator [GBY_92] (rows=2871 width=396) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_22] (rows=2871 width=396) + Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 + Top N Key Operator [TNK_43] (rows=1781971 width=396) + keys:_col2, _col3, _col4,sort order:+++,top n:100 + Merge Join Operator [MERGEJOIN_77] (rows=1781971 width=396) + Conds:RS_91._col0=RS_19._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 7 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=2871 width=400) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_76] (rows=2871 width=400) + Conds:RS_14._col1=RS_86._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_85] (rows=297 width=400) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_84] (rows=297 width=404) + predicate:((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52) + TableScan [TS_8] (rows=462000 width=403) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_75] (rows=463969 width=4) + Conds:RS_80._col0=RS_83._col0(Inner),Output:["_col1"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_80] + PartitionCols:_col0 + Select Operator [SEL_79] (rows=4176000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_78] (rows=4176000 width=11) + predicate:inv_quantity_on_hand BETWEEN 100 AND 500 + TableScan [TS_2] (rows=37584000 width=11) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_81] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' + TableScan [TS_5] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=287989836 width=4) + Output:["_col0"] + Filter Operator [FIL_89] (rows=287989836 width=4) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) + TableScan [TS_0] (rows=287989836 width=4) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_46] + Group By Operator [GBY_45] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_44] (rows=2871 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_17] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out new file mode 100644 index 0000000000..bc22cfb07e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out @@ -0,0 +1,261 @@ +PREHOOK: query: explain +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 13 (BROADCAST_EDGE) +Map 20 <- Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_232] + Limit [LIM_231] (rows=1 width=8) + Number of rows:100 + Group By Operator [GBY_230] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_229] + Group By Operator [GBY_228] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_227] (rows=1 width=8) + Filter Operator [FIL_226] (rows=1 width=8) + predicate:(_col3 = 3L) + Select Operator [SEL_225] (rows=165330890 width=8) + Output:["_col3"] + Group By Operator [GBY_224] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] vectorized + Reduce Output Operator [RS_242] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_241] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_240] (rows=49146883 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_239] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_238] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_39] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_175] (rows=49146883 width=274) + Conds:RS_35._col1=RS_217._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=80000000 width=184) + Output:["_col0","_col1","_col2"] + TableScan [TS_6] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_174] (rows=49146883 width=97) + Conds:RS_237._col0=RS_200._col0(Inner),Output:["_col1","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + PartitionCols:_col0 + Select Operator [SEL_197] (rows=317 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_196] (rows=317 width=102) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + PartitionCols:_col0 + Select Operator [SEL_236] (rows=285117831 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_235] (rows=285117831 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_24] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_208] + Group By Operator [GBY_205] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_201] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_197] + <-Reducer 16 [CONTAINS] vectorized + Reduce Output Operator [RS_252] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_251] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_250] (rows=24986582 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_249] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_248] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_64] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_177] (rows=24986582 width=274) + Conds:RS_60._col1=RS_218._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_215] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=24986582 width=97) + Conds:RS_247._col0=RS_202._col0(Inner),Output:["_col1","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_197] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_247] + PartitionCols:_col0 + Select Operator [SEL_246] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_245] (rows=143930993 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_49] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_209] + Group By Operator [GBY_206] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_203] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_197] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_223] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_222] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_221] (rows=91197425 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_220] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_219] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_15] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_173] (rows=91197425 width=274) + Conds:RS_11._col1=RS_216._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_215] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_172] (rows=91197425 width=96) + Conds:RS_214._col0=RS_198._col0(Inner),Output:["_col1","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_198] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_197] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_212] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_207] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_199] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_197] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out new file mode 100644 index 0000000000..812928b32c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out @@ -0,0 +1,236 @@ +PREHOOK: query: explain +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_166] + Select Operator [SEL_165] (rows=859 width=56) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_44] + Select Operator [SEL_43] (rows=859 width=48) + Output:["_col0","_col1","_col2","_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_144] (rows=859 width=40) + Conds:RS_160._col0, _col1=RS_164._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col6","_col7"] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_160] + PartitionCols:_col0, _col1 + Select Operator [SEL_159] (rows=859 width=24) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_158] (rows=859 width=40) + predicate:CASE WHEN (((UDFToDouble(_col2) / _col3) = 0)) THEN (false) ELSE (((power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col2) / _col3)) > 1.0D)) END + Group By Operator [GBY_157] (rows=1719 width=40) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=1719 width=40) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2 + Select Operator [SEL_13] (rows=1032514 width=8) + Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_141] (rows=1032514 width=8) + Conds:RS_10._col1=RS_155._col0(Inner),Output:["_col2","_col3","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] + PartitionCols:_col0 + Select Operator [SEL_154] (rows=27 width=4) + Output:["_col0"] + TableScan [TS_5] (rows=27 width=4) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_140] (rows=1032514 width=8) + Conds:RS_146._col0=RS_152._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + PartitionCols:_col0 + Select Operator [SEL_145] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_148] (rows=50 width=12) + predicate:((d_moy = 5) and (d_year = 1999)) + TableScan [TS_2] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_164] + PartitionCols:_col0, _col1 + Select Operator [SEL_163] (rows=859 width=24) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_162] (rows=859 width=40) + predicate:CASE WHEN (((UDFToDouble(_col2) / _col3) = 0)) THEN (false) ELSE (((power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col2) / _col3)) > 1.0D)) END + Group By Operator [GBY_161] (rows=1719 width=40) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=1719 width=40) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2 + Select Operator [SEL_33] (rows=1032514 width=8) + Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_143] (rows=1032514 width=8) + Conds:RS_30._col1=RS_156._col0(Inner),Output:["_col2","_col3","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_154] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_142] (rows=1032514 width=8) + Conds:RS_147._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_145] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_149] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 1999)) + Please refer to the previous TableScan [TS_2] + +PREHOOK: query: with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out new file mode 100644 index 0000000000..9b81cd0ba2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out @@ -0,0 +1,627 @@ +PREHOOK: query: explain +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 34 <- Reducer 24 (BROADCAST_EDGE) +Map 35 <- Reducer 28 (BROADCAST_EDGE) +Map 36 <- Reducer 19 (BROADCAST_EDGE) +Map 37 <- Reducer 32 (BROADCAST_EDGE) +Map 38 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 33 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 18 (ONE_TO_ONE_EDGE) +Reducer 13 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 36 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 15 <- Map 33 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 23 (ONE_TO_ONE_EDGE) +Reducer 18 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 19 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 34 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 21 <- Map 33 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 24 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 35 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 26 <- Map 33 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 37 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 33 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 33 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 38 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_546] + Limit [LIM_545] (rows=100 width=85) + Number of rows:100 + Select Operator [SEL_544] (rows=7323197 width=85) + Output:["_col0"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_137] + Select Operator [SEL_136] (rows=7323197 width=85) + Output:["_col0"] + Filter Operator [FIL_135] (rows=7323197 width=433) + predicate:CASE WHEN (_col5) THEN (CASE WHEN (_col6) THEN ((_col7 > (_col1 / _col4))) ELSE ((null > (_col1 / _col4))) END) ELSE (_col8) END + Merge Join Operator [MERGEJOIN_462] (rows=14646395 width=433) + Conds:RS_507._col0=RS_133._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 12 [ONE_TO_ONE_EDGE] + FORWARD [RS_133] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=12248093 width=421) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_130] (rows=12248093 width=649) + predicate:CASE WHEN (_col4) THEN (CASE WHEN (_col3) THEN (((_col11 / _col2) > _col5)) ELSE (_col6) END) ELSE (CASE WHEN (_col3) THEN (((_col11 / _col2) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_461] (rows=24496186 width=649) + Conds:RS_127._col0=RS_513._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col11"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_513] + PartitionCols:_col0 + Select Operator [SEL_512] (rows=80000000 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_511] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_121] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_120] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_457] (rows=101084444 width=764) + Conds:RS_116._col1=RS_500._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_500] + PartitionCols:_col0 + Select Operator [SEL_498] (rows=80000000 width=656) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + TableScan [TS_6] (rows=80000000 width=656) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_116] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_456] (rows=101084444 width=115) + Conds:RS_113._col0=RS_473._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_473] + PartitionCols:_col0 + Select Operator [SEL_467] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_463] (rows=652 width=8) + predicate:(d_year = 2002) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 38 [SIMPLE_EDGE] + SHUFFLE [RS_113] + PartitionCols:_col0 + Select Operator [SEL_107] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_246] (rows=285117831 width=453) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_114_date_dim_d_date_sk_min) AND DynamicValue(RS_114_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_114_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_105] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_509] + Group By Operator [GBY_508] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_490] + Group By Operator [GBY_484] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_474] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_467] + <-Reducer 18 [ONE_TO_ONE_EDGE] + FORWARD [RS_127] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_460] (rows=20485011 width=537) + Conds:RS_124._col0=RS_543._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] + <-Reducer 17 [ONE_TO_ONE_EDGE] + FORWARD [RS_124] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=31888273 width=421) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_459] (rows=31888273 width=529) + Conds:RS_81._col0=RS_519._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7","_col8"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_519] + PartitionCols:_col0 + Select Operator [SEL_518] (rows=80000000 width=297) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_517] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_74] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_453] (rows=187573258 width=764) + Conds:RS_70._col1=RS_501._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_501] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_498] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_452] (rows=187573258 width=115) + Conds:RS_67._col0=RS_475._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_475] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_467] + <-Map 36 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0 + Select Operator [SEL_61] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_240] (rows=525327388 width=435) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_59] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_515] + Group By Operator [GBY_514] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_491] + Group By Operator [GBY_485] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_476] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_467] + <-Reducer 23 [ONE_TO_ONE_EDGE] + FORWARD [RS_81] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_458] (rows=26666666 width=332) + Conds:RS_527._col0=RS_535._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_527] + PartitionCols:_col0 + Select Operator [SEL_526] (rows=26666666 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_525] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_524] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_523] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_34] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_449] (rows=187573258 width=764) + Conds:RS_30._col1=RS_502._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_502] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_498] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_448] (rows=187573258 width=115) + Conds:RS_27._col0=RS_477._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_477] + PartitionCols:_col0 + Select Operator [SEL_468] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_464] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_3] + <-Map 34 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_21] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_234] (rows=525327388 width=435) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_19] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_521] + Group By Operator [GBY_520] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_492] + Group By Operator [GBY_486] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_478] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_468] + <-Reducer 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_535] + PartitionCols:_col0 + Select Operator [SEL_534] (rows=26666666 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_533] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_532] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_531] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_54] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_451] (rows=101084444 width=764) + Conds:RS_50._col1=RS_503._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_503] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_498] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_450] (rows=101084444 width=115) + Conds:RS_47._col0=RS_479._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_479] + PartitionCols:_col0 + Select Operator [SEL_469] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_465] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_3] + <-Map 35 [SIMPLE_EDGE] + SHUFFLE [RS_47] + PartitionCols:_col0 + Select Operator [SEL_41] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_237] (rows=285117831 width=453) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_39] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_529] + Group By Operator [GBY_528] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_493] + Group By Operator [GBY_487] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_480] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_469] + <-Reducer 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_543] + PartitionCols:_col0 + Select Operator [SEL_542] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_541] (rows=17130654 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_540] (rows=51391963 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_539] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_100] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_455] (rows=51391963 width=764) + Conds:RS_96._col1=RS_504._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_504] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_498] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_454] (rows=51391963 width=115) + Conds:RS_93._col0=RS_481._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_481] + PartitionCols:_col0 + Select Operator [SEL_470] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_466] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_3] + <-Map 37 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0 + Select Operator [SEL_87] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_243] (rows=143930993 width=455) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_94_date_dim_d_date_sk_min) AND DynamicValue(RS_94_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_94_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_85] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_537] + Group By Operator [GBY_536] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_494] + Group By Operator [GBY_488] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_482] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_470] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_507] + PartitionCols:_col0 + Select Operator [SEL_506] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_505] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_15] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Merge Join Operator [MERGEJOIN_447] (rows=51391963 width=764) + Conds:RS_11._col1=RS_499._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_499] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_498] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_446] (rows=51391963 width=115) + Conds:RS_8._col0=RS_471._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_471] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_467] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_8] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_231] (rows=143930993 width=455) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_496] + Group By Operator [GBY_495] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_489] + Group By Operator [GBY_483] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_472] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_467] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out new file mode 100644 index 0000000000..0a69c9d709 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out @@ -0,0 +1,185 @@ +PREHOOK: query: explain +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_128] + Limit [LIM_127] (rows=100 width=410) + Number of rows:100 + Select Operator [SEL_126] (rows=51333 width=410) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=51333 width=410) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0, _col1 + Group By Operator [GBY_28] (rows=359331 width=410) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 + Top N Key Operator [TNK_54] (rows=5757278 width=306) + keys:_col0, _col1,sort order:++,top n:100 + Select Operator [SEL_26] (rows=5757278 width=306) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_100] (rows=5757278 width=306) + Conds:RS_23._col1=RS_111._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] + PartitionCols:_col0 + Select Operator [SEL_110] (rows=51333 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_109] (rows=51333 width=215) + predicate:i_current_price BETWEEN 0.99 AND 1.49 + TableScan [TS_17] (rows=462000 width=215) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_99] (rows=51815831 width=210) + Conds:RS_20._col0=RS_103._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] + PartitionCols:_col0 + Select Operator [SEL_102] (rows=8116 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_101] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' + TableScan [TS_14] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=466374405 width=205) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_98] (rows=466374405 width=249) + Conds:RS_10._col1=RS_123._col0(Inner),Output:["_col0","_col2","_col4","_col7","_col9"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_122] (rows=27 width=90) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=27 width=90) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_97] (rows=466374405 width=167) + Conds:RS_119._col2, _col3=RS_121._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + PartitionCols:_col2, _col3 + Select Operator [SEL_118] (rows=285115816 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_117] (rows=285115816 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_0] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] + Group By Operator [GBY_105] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_104] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_102] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_112] (rows=51333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_110] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + PartitionCols:_col0, _col1 + Select Operator [SEL_120] (rows=28798881 width=117) + Output:["_col0","_col1","_col2"] + TableScan [TS_3] (rows=28798881 width=117) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query42.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query42.q.out new file mode 100644 index 0000000000..02b9f7c074 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query42.q.out @@ -0,0 +1,139 @@ +PREHOOK: query: explain +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_79] + Limit [LIM_78] (rows=100 width=210) + Number of rows:100 + Select Operator [SEL_77] (rows=110 width=210) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] + Select Operator [SEL_75] (rows=110 width=318) + Output:["_col0","_col1","_col3"] + Group By Operator [GBY_74] (rows=110 width=206) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1 + Group By Operator [GBY_16] (rows=120 width=206) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col5, _col6 + Merge Join Operator [MERGEJOIN_54] (rows=2301098 width=94) + Conds:RS_12._col1=RS_65._col0(Inner),Output:["_col2","_col5","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=7333 width=97) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_63] (rows=7333 width=101) + predicate:(i_manager_id = 1) + TableScan [TS_6] (rows=462000 width=101) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category_id","i_category","i_manager_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_53] (rows=15062131 width=4) + Conds:RS_73._col0=RS_57._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_57] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_55] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 1998)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,dt,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_71] (rows=550076554 width=114) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_dt_d_date_sk_min) AND DynamicValue(RS_10_dt_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_dt_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_62] + Group By Operator [GBY_61] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_60] + Group By Operator [GBY_59] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_58] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_56] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_70] + Group By Operator [GBY_69] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_66] (rows=7333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_64] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out new file mode 100644 index 0000000000..8183690105 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: explain +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_79] + Limit [LIM_78] (rows=100 width=972) + Number of rows:100 + Select Operator [SEL_77] (rows=3751 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] + Group By Operator [GBY_75] (rows=3751 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1 + Group By Operator [GBY_17] (rows=142538 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col10, _col9 + Top N Key Operator [TNK_33] (rows=37536846 width=257) + keys:_col10, _col9,sort order:++,top n:100 + Select Operator [SEL_15] (rows=37536846 width=257) + Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_55] (rows=37536846 width=257) + Conds:RS_12._col1=RS_66._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] + PartitionCols:_col0 + Select Operator [SEL_65] (rows=341 width=192) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_64] (rows=341 width=303) + predicate:(s_gmt_offset = -6) + TableScan [TS_6] (rows=1704 width=303) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_54] (rows=187574154 width=129) + Conds:RS_74._col0=RS_58._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_57] (rows=652 width=32) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_56] (rows=652 width=99) + predicate:(d_year = 1998) + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] + PartitionCols:_col0 + Select Operator [SEL_73] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_72] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_13_store_s_store_sk_min) AND DynamicValue(RS_13_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_13_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_63] + Group By Operator [GBY_62] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_61] + Group By Operator [GBY_60] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_59] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_57] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_67] (rows=341 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_65] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out new file mode 100644 index 0000000000..13b093625c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -0,0 +1,193 @@ +Warning: Shuffle Join MERGEJOIN[101][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 8 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_135] + Limit [LIM_134] (rows=100 width=218) + Number of rows:100 + Select Operator [SEL_133] (rows=6951 width=218) + Output:["_col0","_col1","_col2"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_67] + Select Operator [SEL_66] (rows=6951 width=218) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_105] (rows=6951 width=218) + Conds:RS_63._col3=RS_64._col3(Inner),Output:["_col1","_col3","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_102] (rows=6951 width=111) + Conds:RS_107._col0=RS_127._col0(Inner),Output:["_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=462000 width=111) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=462000 width=111) + default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col0 + Select Operator [SEL_126] (rows=6951 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_125] (rows=6951 width=116) + predicate:(rank_window_0 < 11) + PTF Operator [PTF_124] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] + Select Operator [SEL_123] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:0 + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_101] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_122] + Select Operator [SEL_121] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_120] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + PartitionCols:_col0 + Group By Operator [GBY_118] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_117] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_116] (rows=287946 width=114) + predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) + TableScan [TS_9] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_115] + Select Operator [SEL_114] (rows=62562 width=116) + Output:["_col0","_col1"] + Group By Operator [GBY_113] (rows=62562 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + PartitionCols:_col0 + Group By Operator [GBY_111] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_110] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_109] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_2] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_104] (rows=6951 width=111) + Conds:RS_108._col0=RS_132._col0(Inner),Output:["_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_106] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=6951 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_130] (rows=6951 width=116) + predicate:(rank_window_0 < 11) + PTF Operator [PTF_129] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_128] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:0 + Please refer to the previous Filter Operator [FIL_20] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out new file mode 100644 index 0000000000..bf620c8ed5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out @@ -0,0 +1,183 @@ +PREHOOK: query: explain +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select ca_zip, ca_county, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_county + order by ca_zip, ca_county + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 7 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) +Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_149] + Limit [LIM_148] (rows=100 width=299) + Number of rows:100 + Select Operator [SEL_147] (rows=285780 width=299) + Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=285780 width=299) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0, _col1 + Group By Operator [GBY_40] (rows=3715140 width=299) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 + Top N Key Operator [TNK_69] (rows=10246864 width=302) + keys:_col8, _col7,sort order:++,top n:100 + Select Operator [SEL_39] (rows=10246864 width=302) + Output:["_col3","_col7","_col8"] + Filter Operator [FIL_38] (rows=10246864 width=302) + predicate:((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or _col15 is not null) + Select Operator [SEL_37] (rows=10246864 width=302) + Output:["_col3","_col7","_col8","_col15"] + Merge Join Operator [MERGEJOIN_119] (rows=10246864 width=302) + Conds:RS_34._col0=RS_35._col6(Inner),Output:["_col3","_col4","_col8","_col12"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_115] (rows=80000000 width=191) + Conds:RS_122._col1=RS_124._col0(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] + PartitionCols:_col1 + Select Operator [SEL_121] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_120] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=40000000 width=191) + Output:["_col0","_col1","_col2"] + TableScan [TS_3] (rows=40000000 width=191) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_118] (rows=10246864 width=119) + Conds:RS_27._col0=RS_28._col1(Inner),Output:["_col3","_col6","_col7"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_117] (rows=10246864 width=119) + Conds:RS_144._col0=RS_136._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_135] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_134] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000)) + TableScan [TS_17] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=143930993 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_142] (rows=143930993 width=123) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_14] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_141] + Group By Operator [GBY_140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_137] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_135] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_116] (rows=462007 width=4) + Conds:RS_127._col1=RS_133._col0(Left Outer),Output:["_col0","_col3"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col1 + Select Operator [SEL_125] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_133] + PartitionCols:_col0 + Select Operator [SEL_132] (rows=5 width=104) + Output:["_col0","_col1"] + Group By Operator [GBY_131] (rows=5 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] + PartitionCols:_col0 + Group By Operator [GBY_129] (rows=5 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_128] (rows=11 width=104) + Output:["i_item_id"] + Filter Operator [FIL_126] (rows=11 width=104) + predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + Please refer to the previous TableScan [TS_5] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out new file mode 100644 index 0000000000..c15144fb95 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out @@ -0,0 +1,242 @@ +PREHOOK: query: explain +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood','Union','Salem','Highland Park') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 8 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 16 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_183] + Limit [LIM_182] (rows=100 width=594) + Number of rows:100 + Select Operator [SEL_181] (rows=20351707 width=594) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_45] + Select Operator [SEL_44] (rows=20351707 width=594) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_43] (rows=20351707 width=594) + predicate:(_col5 <> _col8) + Merge Join Operator [MERGEJOIN_144] (rows=20351707 width=594) + Conds:RS_40._col0=RS_180._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_139] (rows=80000000 width=277) + Conds:RS_147._col1=RS_149._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=40000000 width=97) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=40000000 width=97) + default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col1 + Select Operator [SEL_146] (rows=80000000 width=188) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_145] (rows=80000000 width=188) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=188) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_180] + PartitionCols:_col1 + Select Operator [SEL_179] (rows=20351707 width=321) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_178] (rows=20351707 width=321) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_33] (rows=20351707 width=321) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col8, _col2, _col3 + Merge Join Operator [MERGEJOIN_143] (rows=20351707 width=97) + Conds:RS_29._col2=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col8"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_148] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_142] (rows=20351707 width=4) + Conds:RS_26._col1=RS_169._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_169] + PartitionCols:_col0 + Select Operator [SEL_168] (rows=1855 width=4) + Output:["_col0"] + Filter Operator [FIL_167] (rows=1855 width=12) + predicate:((hd_dep_count = 2) or (hd_vehicle_count = 1)) + TableScan [TS_21] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col1 + Select Operator [SEL_20] (rows=78993142 width=178) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_141] (rows=78993142 width=178) + Conds:RS_17._col4=RS_161._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=85 width=4) + Output:["_col0"] + Filter Operator [FIL_159] (rows=85 width=97) + predicate:(s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') + TableScan [TS_11] (rows=1704 width=97) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_140] (rows=196204013 width=218) + Conds:RS_177._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_152] (rows=783 width=4) + Output:["_col0"] + Filter Operator [FIL_151] (rows=783 width=12) + predicate:((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000)) + TableScan [TS_8] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dow"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_177] + PartitionCols:_col0 + Select Operator [SEL_176] (rows=457565061 width=237) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_175] (rows=457565061 width=237) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_27_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_27_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_27_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=237) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_154] (rows=783 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_152] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_164] + Group By Operator [GBY_163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_162] (rows=85 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_160] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_174] + Group By Operator [GBY_173] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_170] (rows=1855 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_168] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out new file mode 100644 index 0000000000..104c0bbd3d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out @@ -0,0 +1,268 @@ +PREHOOK: query: explain +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Reducer 5 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_324] + Limit [LIM_323] (rows=100 width=658) + Number of rows:100 + Select Operator [SEL_322] (rows=241454 width=658) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_113] + Select Operator [SEL_112] (rows=241454 width=658) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_281] (rows=241454 width=546) + Conds:RS_109._col6, _col7, _col8, _col9, _col14=RS_309._col0, _col1, _col2, _col3, _col5(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col0, _col1, _col2, _col3, _col5 + Select Operator [SEL_307] (rows=162257387 width=485) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_305] (rows=162257387 width=489) + predicate:rank_window_0 is not null + PTF Operator [PTF_303] (rows=162257387 width=489) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] + Select Operator [SEL_302] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col1, _col0, _col4, _col5 + Group By Operator [GBY_299] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_95] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col0, _col1, _col4, _col5, _col7, _col8 + Merge Join Operator [MERGEJOIN_279] (rows=162257387 width=472) + Conds:RS_91._col2=RS_298._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] + PartitionCols:_col0 + Select Operator [SEL_297] (rows=1704 width=183) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_296] (rows=1704 width=183) + predicate:(s_company_name is not null and s_store_name is not null) + TableScan [TS_88] (rows=1704 width=183) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col2 + Select Operator [SEL_87] (rows=162257387 width=297) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_278] (rows=162257387 width=297) + Conds:RS_84._col1=RS_295._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_294] (rows=462000 width=194) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_293] (rows=462000 width=194) + predicate:(i_brand is not null and i_category is not null) + TableScan [TS_78] (rows=462000 width=194) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_277] (rows=162257387 width=111) + Conds:RS_292._col0=RS_284._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + PartitionCols:_col0 + Select Operator [SEL_283] (rows=564 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_282] (rows=564 width=12) + predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001)) + TableScan [TS_75] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] + PartitionCols:_col0 + Select Operator [SEL_291] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_290] (rows=525329897 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_82_date_dim_d_date_sk_min) AND DynamicValue(RS_82_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_82_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_72] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] + Group By Operator [GBY_286] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_285] (rows=564 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_283] + <-Reducer 9 [ONE_TO_ONE_EDGE] + FORWARD [RS_109] + PartitionCols:_col6, _col7, _col8, _col9, _col14 + Merge Join Operator [MERGEJOIN_280] (rows=241454 width=717) + Conds:RS_310._col0, _col1, _col2, _col3, _col5=RS_321._col0, _col1, _col2, _col3, _col8(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] + PartitionCols:_col0, _col1, _col2, _col3, _col5 + Select Operator [SEL_308] (rows=162257387 width=485) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_306] (rows=162257387 width=489) + predicate:rank_window_0 is not null + PTF Operator [PTF_304] (rows=162257387 width=489) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] + Please refer to the previous Select Operator [SEL_302] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] + PartitionCols:_col0, _col1, _col2, _col3, _col8 + Select Operator [SEL_320] (rows=241454 width=605) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_319] (rows=241454 width=605) + predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END + Select Operator [SEL_318] (rows=482909 width=601) + Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_317] (rows=482909 width=601) + predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) + PTF Operator [PTF_316] (rows=162257387 width=601) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5, _col6"}] + Select Operator [SEL_315] (rows=162257387 width=601) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col1, _col0, _col4, _col5 + Select Operator [SEL_313] (rows=162257387 width=489) + Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + PTF Operator [PTF_312] (rows=162257387 width=489) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col5, _col2"}] + Select Operator [SEL_311] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] + PartitionCols:_col1, _col0, _col4, _col5, _col2 + Please refer to the previous Group By Operator [GBY_299] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out new file mode 100644 index 0000000000..beb328d62a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out @@ -0,0 +1,252 @@ +PREHOOK: query: explain +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 1998 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 25000 + ) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 9 <- Reducer 11 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 5 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_103] + Group By Operator [GBY_102] (rows=1 width=8) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_25] + Group By Operator [GBY_24] (rows=1 width=8) + Output:["_col0"],aggregations:["sum(_col2)"] + Merge Join Operator [MERGEJOIN_74] (rows=20249 width=0) + Conds:RS_77._col0=RS_21._col0(Inner),Output:["_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_77] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_75] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=56709 width=0) + Output:["_col0","_col1"] + Filter Operator [FIL_18] (rows=56709 width=24) + predicate:((_col10 and _col6) or (_col11 and _col7) or (_col9 and _col5)) + Merge Join Operator [MERGEJOIN_73] (rows=75613 width=24) + Conds:RS_15._col3=RS_91._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_89] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) + TableScan [TS_9] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_72] (rows=856943 width=12) + Conds:RS_83._col0=RS_101._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=29552 width=4) + Output:["_col0"] + Filter Operator [FIL_81] (rows=29552 width=183) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M')) + TableScan [TS_3] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_101] + PartitionCols:_col1 + Select Operator [SEL_100] (rows=53235296 width=27) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_99] (rows=53235296 width=233) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_12_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_12_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_12_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_96] + Group By Operator [GBY_95] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3529412)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3529412)"] + Select Operator [SEL_92] (rows=3529412 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_90] + <-Reducer 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_98] + Group By Operator [GBY_97] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_80] + Group By Operator [GBY_79] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_78] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_76] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_86] + Group By Operator [GBY_85] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_84] (rows=29552 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_82] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out new file mode 100644 index 0000000000..6e6a17c7ef --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out @@ -0,0 +1,555 @@ +PREHOOK: query: explain +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 13 (BROADCAST_EDGE) +Map 27 <- Reducer 19 (BROADCAST_EDGE) +Map 29 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Union 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 15 <- Map 28 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Map 12 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 21 <- Map 30 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 25 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE), Union 9 (CONTAINS) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 11 vectorized + File Output Operator [FS_310] + Limit [LIM_309] (rows=100 width=215) + Number of rows:100 + Select Operator [SEL_308] (rows=3418 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + Select Operator [SEL_306] (rows=3418 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_305] (rows=3418 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Union 9 [SIMPLE_EDGE] + <-Reducer 24 [CONTAINS] vectorized + Reduce Output Operator [RS_351] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_350] (rows=3418 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 + Top N Key Operator [TNK_349] (rows=3418 width=214) + keys:_col0, _col3, _col4, _col1, _col2,sort order:+++++,top n:100 + Select Operator [SEL_348] (rows=1142 width=213) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_347] (rows=1142 width=248) + predicate:((_col0 <= 10) or (rank_window_1 <= 10)) + PTF Operator [PTF_346] (rows=1714 width=248) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_345] (rows=1714 width=248) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_344] + PartitionCols:0 + Select Operator [SEL_343] (rows=1714 width=244) + Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] + PTF Operator [PTF_342] (rows=1714 width=244) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_341] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_340] + PartitionCols:0 + Group By Operator [GBY_339] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col0 + Group By Operator [GBY_88] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)"],keys:_col1 + Select Operator [SEL_86] (rows=1673571 width=73) + Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_237] (rows=1673571 width=73) + Conds:RS_83._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] + PartitionCols:_col0, _col1 + Select Operator [SEL_337] (rows=19197050 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_336] (rows=19197050 width=119) + predicate:(sr_return_amt > 10000) + TableScan [TS_77] (rows=57591150 width=119) + default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_236] (rows=1673571 width=8) + Conds:RS_335._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_272] + PartitionCols:_col0 + Select Operator [SEL_267] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_266] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 2000)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_335] + PartitionCols:_col0 + Select Operator [SEL_334] (rows=61119617 width=229) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_333] (rows=61119617 width=229) + predicate:((ss_net_paid > 0) and (ss_net_profit > 1) and (ss_quantity > 0) and (ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_71] (rows=575995635 width=229) + default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_332] + Group By Operator [GBY_331] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_279] + Group By Operator [GBY_276] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_273] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_267] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_304] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_303] (rows=3418 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 + Top N Key Operator [TNK_302] (rows=3418 width=214) + keys:_col0, _col3, _col4, _col1, _col2,sort order:+++++,top n:100 + Select Operator [SEL_301] (rows=2276 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_300] (rows=2276 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 18 [CONTAINS] vectorized + Reduce Output Operator [RS_330] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_329] (rows=2276 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 + Select Operator [SEL_328] (rows=1134 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_327] (rows=1134 width=248) + predicate:((_col0 <= 10) or (rank_window_1 <= 10)) + PTF Operator [PTF_326] (rows=1701 width=248) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_325] (rows=1701 width=248) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] + PartitionCols:0 + Select Operator [SEL_323] (rows=1701 width=244) + Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] + PTF Operator [PTF_322] (rows=1701 width=244) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_321] (rows=1701 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] + PartitionCols:0 + Group By Operator [GBY_319] (rows=1701 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col0 + Group By Operator [GBY_49] (rows=1701 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)"],keys:_col1 + Select Operator [SEL_47] (rows=865646 width=188) + Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_235] (rows=865646 width=188) + Conds:RS_44._col1, _col2=RS_318._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0, _col1 + Select Operator [SEL_317] (rows=9599627 width=121) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_316] (rows=9599627 width=121) + predicate:(cr_return_amount > 10000) + TableScan [TS_38] (rows=28798881 width=121) + default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_234] (rows=865646 width=102) + Conds:RS_315._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_270] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_267] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] + PartitionCols:_col0 + Select Operator [SEL_314] (rows=31838858 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_313] (rows=31838858 width=239) + predicate:((cs_net_paid > 0) and (cs_net_profit > 1) and (cs_quantity > 0) and (cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_32] (rows=287989836 width=239) + default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_312] + Group By Operator [GBY_311] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_278] + Group By Operator [GBY_275] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_271] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_267] + <-Reducer 6 [CONTAINS] vectorized + Reduce Output Operator [RS_299] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_298] (rows=2276 width=215) + Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 + Select Operator [SEL_297] (rows=1142 width=211) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_296] (rows=1142 width=248) + predicate:((_col0 <= 10) or (rank_window_1 <= 10)) + PTF Operator [PTF_295] (rows=1714 width=248) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_294] (rows=1714 width=248) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:0 + Select Operator [SEL_292] (rows=1714 width=244) + Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] + PTF Operator [PTF_291] (rows=1714 width=244) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_290] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:0 + Group By Operator [GBY_288] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Group By Operator [GBY_17] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)"],keys:_col1 + Select Operator [SEL_15] (rows=438010 width=177) + Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_233] (rows=438010 width=177) + Conds:RS_12._col1, _col2=RS_287._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] + PartitionCols:_col0, _col1 + Select Operator [SEL_286] (rows=4799489 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_285] (rows=4799489 width=118) + predicate:(wr_return_amt > 10000) + TableScan [TS_6] (rows=14398467 width=118) + default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_232] (rows=438010 width=122) + Conds:RS_284._col0=RS_268._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_268] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_267] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + PartitionCols:_col0 + Select Operator [SEL_283] (rows=15996318 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_282] (rows=15996318 width=239) + predicate:((ws_net_paid > 0) and (ws_net_profit > 1) and (ws_quantity > 0) and (ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=239) + default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_281] + Group By Operator [GBY_280] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_277] + Group By Operator [GBY_274] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_269] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_267] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out new file mode 100644 index 0000000000..32b0e3ec2a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out @@ -0,0 +1,531 @@ +PREHOOK: query: explain +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_page +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 14 days) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_page +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 21 <- Reducer 15 (BROADCAST_EDGE), Union 22 (CONTAINS) +Map 23 <- Union 22 (CONTAINS) +Map 25 <- Reducer 19 (BROADCAST_EDGE), Union 26 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE) +Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 10 (SIMPLE_EDGE), Union 26 (SIMPLE_EDGE) +Reducer 17 <- Map 30 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE), Union 26 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_300] + Limit [LIM_299] (rows=100 width=619) + Number of rows:100 + Select Operator [SEL_298] (rows=38846 width=619) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] + Select Operator [SEL_296] (rows=38846 width=619) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_295] (rows=38846 width=627) + Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] vectorized + Reduce Output Operator [RS_310] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_309] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_308] (rows=39721 width=618) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_307] (rows=38846 width=619) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_306] (rows=38846 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Group By Operator [GBY_44] (rows=46000 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 + Merge Join Operator [MERGEJOIN_219] (rows=34813117 width=535) + Conds:RS_40._col0=RS_305._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_305] + PartitionCols:_col0 + Select Operator [SEL_304] (rows=46000 width=104) + Output:["_col0","_col1"] + TableScan [TS_35] (rows=46000 width=104) + default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_218] (rows=34813117 width=438) + Conds:Union 22._col1=RS_273._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_273] + PartitionCols:_col0 + Select Operator [SEL_270] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_269] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' + TableScan [TS_8] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Union 22 [SIMPLE_EDGE] + <-Map 21 [CONTAINS] vectorized + Reduce Output Operator [RS_322] + PartitionCols:_col1 + Select Operator [SEL_321] (rows=285117694 width=455) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_320] (rows=285117694 width=231) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_250] (rows=287989836 width=231) + Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_ext_sales_price","cs_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_319] + Group By Operator [GBY_318] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_281] + Group By Operator [GBY_278] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_274] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_270] + <-Map 23 [CONTAINS] vectorized + Reduce Output Operator [RS_325] + PartitionCols:_col1 + Select Operator [SEL_324] (rows=28221805 width=451) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_323] (rows=28221805 width=227) + predicate:(cr_catalog_page_sk is not null and cr_returned_date_sk is not null) + TableScan [TS_255] (rows=28798881 width=227) + Output:["cr_returned_date_sk","cr_catalog_page_sk","cr_return_amount","cr_net_loss"] + <-Reducer 18 [CONTAINS] vectorized + Reduce Output Operator [RS_317] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_316] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_315] (rows=39721 width=618) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_314] (rows=53 width=615) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_313] (rows=53 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Group By Operator [GBY_76] (rows=84 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 + Merge Join Operator [MERGEJOIN_221] (rows=30966059 width=543) + Conds:RS_72._col0=RS_312._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_311] (rows=84 width=104) + Output:["_col0","_col1"] + TableScan [TS_67] (rows=84 width=104) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_220] (rows=30966059 width=447) + Conds:Union 26._col1=RS_275._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_275] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_270] + <-Union 26 [SIMPLE_EDGE] + <-Map 25 [CONTAINS] vectorized + Reduce Output Operator [RS_330] + PartitionCols:_col1 + Select Operator [SEL_329] (rows=143930874 width=455) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_328] (rows=143930874 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_260] (rows=144002668 width=231) + Output:["ws_sold_date_sk","ws_web_site_sk","ws_ext_sales_price","ws_net_profit"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_327] + Group By Operator [GBY_326] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] + Group By Operator [GBY_279] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_276] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_270] + <-Reducer 28 [CONTAINS] + Reduce Output Operator [RS_268] + PartitionCols:_col1 + Select Operator [SEL_266] (rows=134782734 width=454) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_265] (rows=134782734 width=230) + Conds:RS_333._col0, _col2=RS_336._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + PartitionCols:_col0, _col2 + Select Operator [SEL_332] (rows=143966669 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_331] (rows=143966669 width=11) + predicate:ws_web_site_sk is not null + TableScan [TS_52] (rows=144002668 width=11) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_336] + PartitionCols:_col1, _col2 + Select Operator [SEL_335] (rows=13749816 width=225) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_334] (rows=13749816 width=225) + predicate:wr_returned_date_sk is not null + TableScan [TS_55] (rows=14398467 width=225) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] + <-Reducer 5 [CONTAINS] vectorized + Reduce Output Operator [RS_294] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_293] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_292] (rows=39721 width=618) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_291] (rows=822 width=617) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_290] (rows=822 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=1704 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 + Merge Join Operator [MERGEJOIN_217] (rows=64325014 width=376) + Conds:RS_16._col0=RS_289._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_11] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_216] (rows=64325014 width=277) + Conds:Union 2._col1=RS_271._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_271] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_270] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + Reduce Output Operator [RS_287] + PartitionCols:_col1 + Select Operator [SEL_286] (rows=525329897 width=445) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_285] (rows=525329897 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_14_date_dim_d_date_sk_min) AND DynamicValue(RS_14_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_14_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_222] (rows=575995635 width=221) + Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_284] + Group By Operator [GBY_283] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_280] + Group By Operator [GBY_277] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_272] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_270] + <-Map 9 [CONTAINS] vectorized + Reduce Output Operator [RS_303] + PartitionCols:_col1 + Select Operator [SEL_302] (rows=53634860 width=447) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_301] (rows=53634860 width=223) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) + TableScan [TS_233] (rows=57591150 width=223) + Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out new file mode 100644 index 0000000000..1e76b60aee --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out @@ -0,0 +1,205 @@ +PREHOOK: query: explain +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2000 +and d2.d_moy = 9 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_112] + Limit [LIM_111] (rows=100 width=858) + Number of rows:100 + Select Operator [SEL_110] (rows=3354624 width=857) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=3354624 width=857) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Group By Operator [GBY_22] (rows=11945214 width=857) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Top N Key Operator [TNK_43] (rows=11945214 width=837) + keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10,sort order:++++++++++,top n:100 + Merge Join Operator [MERGEJOIN_96] (rows=11945214 width=837) + Conds:RS_18._col0=RS_107._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_105] (rows=50 width=12) + predicate:((d_moy = 9) and (d_year = 2000)) + TableScan [TS_15] (rows=73049 width=12) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=478292911 width=841) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Merge Join Operator [MERGEJOIN_95] (rows=478292911 width=825) + Conds:RS_11._col3=RS_104._col0(Inner),Output:["_col0","_col5","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_104] + PartitionCols:_col0 + Select Operator [SEL_103] (rows=1704 width=821) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + TableScan [TS_6] (rows=1704 width=821) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_94] (rows=478292911 width=11) + Conds:RS_99._col1, _col2, _col4=RS_102._col1, _col2, _col3(Inner),Output:["_col0","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_99] + PartitionCols:_col1, _col2, _col4 + Select Operator [SEL_98] (rows=501694138 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_97] (rows=501694138 width=19) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col1, _col2, _col3 + Select Operator [SEL_101] (rows=53632139 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_100] (rows=53632139 width=15) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_3] (rows=57591150 width=15) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out new file mode 100644 index 0000000000..98625598bb --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: explain +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 12 <- Reducer 11 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_117] + Limit [LIM_116] (rows=100 width=636) + Number of rows:100 + Select Operator [SEL_115] (rows=363803676 width=636) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=363803676 width=636) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_112] (rows=363803676 width=420) + predicate:(max_window_0 > max_window_1) + PTF Operator [PTF_111] (rows=1091411029 width=420) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END ASC NULLS LAST","partition by:":"CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END"}] + Select Operator [SEL_110] (rows=1091411029 width=420) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END + Merge Join Operator [MERGEJOIN_87] (rows=1091411029 width=420) + Conds:RS_40._col0, _col1=RS_41._col0, _col1(Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0, _col1 + Select Operator [SEL_37] (rows=3442937 width=210) + Output:["_col0","_col1","_col2"] + PTF Operator [PTF_36] (rows=3442937 width=210) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}] + Group By Operator [GBY_32] (rows=3442937 width=210) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_30] (rows=24992810 width=210) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 + Merge Join Operator [MERGEJOIN_86] (rows=24992810 width=209) + Conds:RS_108._col0=RS_92._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] + PartitionCols:_col0 + Select Operator [SEL_89] (rows=317 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_88] (rows=317 width=102) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + PartitionCols:_col0 + Select Operator [SEL_107] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_106] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_27_date_dim_d_date_sk_min) AND DynamicValue(RS_27_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_27_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_20] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_105] + Group By Operator [GBY_104] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_97] + Group By Operator [GBY_95] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_93] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_89] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1 + Select Operator [SEL_17] (rows=3442937 width=210) + Output:["_col0","_col1","_col2"] + PTF Operator [PTF_16] (rows=3442937 width=210) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}] + Group By Operator [GBY_12] (rows=3442937 width=210) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0 + Group By Operator [GBY_10] (rows=95493908 width=210) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 + Merge Join Operator [MERGEJOIN_85] (rows=95493908 width=180) + Conds:RS_102._col0=RS_90._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_90] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_89] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_100] (rows=550076554 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_sales_price"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_99] + Group By Operator [GBY_98] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + Group By Operator [GBY_94] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_91] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_89] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query52.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query52.q.out new file mode 100644 index 0000000000..72f91517ec --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query52.q.out @@ -0,0 +1,139 @@ +PREHOOK: query: explain +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_79] + Select Operator [SEL_78] (rows=100 width=220) + Output:["_col0","_col1","_col2","_col3"] + Limit [LIM_77] (rows=100 width=216) + Number of rows:100 + Select Operator [SEL_76] (rows=7333 width=216) + Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_75] + Group By Operator [GBY_74] (rows=7333 width=216) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1 + Group By Operator [GBY_16] (rows=7333 width=216) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col5, _col6 + Merge Join Operator [MERGEJOIN_54] (rows=2301098 width=104) + Conds:RS_12._col1=RS_65._col0(Inner),Output:["_col2","_col5","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=7333 width=107) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_63] (rows=7333 width=111) + predicate:(i_manager_id = 1) + TableScan [TS_6] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_53] (rows=15062131 width=4) + Conds:RS_73._col0=RS_57._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_57] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_55] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 1998)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,dt,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_71] (rows=550076554 width=114) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_dt_d_date_sk_min) AND DynamicValue(RS_10_dt_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_dt_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_62] + Group By Operator [GBY_61] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_60] + Group By Operator [GBY_59] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_58] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_56] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_70] + Group By Operator [GBY_69] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_66] (rows=7333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_64] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out new file mode 100644 index 0000000000..27adc6ec66 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query53.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: explain +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_86] + Limit [LIM_85] (rows=30 width=228) + Number of rows:100 + Select Operator [SEL_84] (rows=30 width=228) + Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + Select Operator [SEL_24] (rows=30 width=228) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_36] (rows=30 width=228) + predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END + Select Operator [SEL_23] (rows=60 width=116) + Output:["avg_window_0","_col0","_col2"] + PTF Operator [PTF_22] (rows=60 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] + Select Operator [SEL_19] (rows=60 width=116) + Output:["_col0","_col2"] + Group By Operator [GBY_18] (rows=60 width=120) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=60 width=120) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col4, _col6 + Merge Join Operator [MERGEJOIN_62] (rows=129200 width=8) + Conds:RS_12._col0=RS_73._col0(Inner),Output:["_col2","_col4","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=317 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_71] (rows=317 width=12) + predicate:(d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_qoy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_61] (rows=744232 width=4) + Conds:RS_81._col1=RS_65._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=68 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_63] (rows=68 width=290) + predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants')) + TableScan [TS_3] (rows=462000 width=289) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + PartitionCols:_col1 + Select Operator [SEL_80] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_79] (rows=525329897 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_10_item_i_item_sk_min) AND DynamicValue(RS_10_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_10_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_70] + Group By Operator [GBY_69] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_66] (rows=68 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_64] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_78] + Group By Operator [GBY_77] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_76] + Group By Operator [GBY_75] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_74] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_72] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out new file mode 100644 index 0000000000..4a514cef9b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -0,0 +1,441 @@ +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +PREHOOK: query: explain +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Jewelry' + and i_class = 'consignment' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 3 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 3) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 3) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 15 (BROADCAST_EDGE) +Map 20 <- Reducer 26 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Union 21 (CONTAINS) +Map 24 <- Reducer 26 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Union 21 (CONTAINS) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 22 <- Map 25 (SIMPLE_EDGE), Union 21 (SIMPLE_EDGE) +Reducer 23 <- Map 27 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 29 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 29 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 10 vectorized + File Output Operator [FS_353] + Limit [LIM_352] (rows=1 width=16) + Number of rows:100 + Select Operator [SEL_351] (rows=1 width=16) + Output:["_col0","_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_350] + Select Operator [SEL_349] (rows=1 width=16) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_348] (rows=1 width=12) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_347] + PartitionCols:_col0 + Group By Operator [GBY_346] (rows=1 width=12) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Select Operator [SEL_345] (rows=1 width=116) + Output:["_col0"] + Group By Operator [GBY_344] (rows=1 width=116) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col0 + Group By Operator [GBY_118] (rows=312 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_117] (rows=624257222 width=127) + Output:["_col0","_col1"] + Filter Operator [FIL_116] (rows=624257222 width=127) + predicate:_col2 BETWEEN _col3 AND _col4 + Merge Join Operator [MERGEJOIN_273] (rows=5618315000 width=127) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] + Group By Operator [GBY_342] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] + PartitionCols:_col0 + Group By Operator [GBY_321] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_318] (rows=50 width=12) + Output:["_col0"] + Filter Operator [FIL_315] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_50] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Select Operator [SEL_104] (rows=224732600 width=119) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_272] (rows=224732600 width=119) + Conds:(Inner),Output:["_col0","_col4","_col11","_col13"] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Select Operator [SEL_340] (rows=1 width=8) + Filter Operator [FIL_339] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_338] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + Group By Operator [GBY_336] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_335] (rows=25 width=4) + Group By Operator [GBY_334] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Group By Operator [GBY_320] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_317] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_315] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_101] + Select Operator [SEL_85] (rows=224732600 width=119) + Output:["_col0","_col4","_col11","_col13"] + Merge Join Operator [MERGEJOIN_271] (rows=224732600 width=119) + Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_327] + Group By Operator [GBY_325] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Group By Operator [GBY_319] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_316] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_315] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_82] + Merge Join Operator [MERGEJOIN_270] (rows=8989304 width=8) + Conds:(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_79] + Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) + Conds:RS_76._col1=RS_77._col5(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_268] (rows=55046 width=4) + Conds:RS_46._col0=RS_307._col1(Inner),Output:["_col5"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_264] (rows=39720279 width=4) + Conds:RS_286._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col0"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] + PartitionCols:_col1, _col2 + Select Operator [SEL_285] (rows=40000000 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_284] (rows=40000000 width=188) + predicate:(ca_county is not null and ca_state is not null) + TableScan [TS_5] (rows=40000000 width=188) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0, _col1 + Select Operator [SEL_288] (rows=1704 width=184) + Output:["_col0","_col1"] + Filter Operator [FIL_287] (rows=1704 width=184) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_8] (rows=1704 width=184) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + PartitionCols:_col1 + Select Operator [SEL_306] (rows=55046 width=8) + Output:["_col0","_col1"] + Group By Operator [GBY_305] (rows=55046 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1 + Group By Operator [GBY_39] (rows=55046 width=8) + Output:["_col0","_col1"],keys:_col1, _col0 + Merge Join Operator [MERGEJOIN_267] (rows=110092 width=8) + Conds:RS_292._col0=RS_36._col0(Inner),Output:["_col0","_col1"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] + PartitionCols:_col0 + Select Operator [SEL_291] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_290] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_11] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=110092 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_266] (rows=110092 width=0) + Conds:RS_31._col2=RS_301._col0(Inner),Output:["_col1"] + <-Map 27 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_301] + PartitionCols:_col0 + Select Operator [SEL_300] (rows=453 width=4) + Output:["_col0"] + Filter Operator [FIL_299] (rows=453 width=186) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) + TableScan [TS_25] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_265] (rows=11665117 width=7) + Conds:Union 21._col0=RS_295._col0(Inner),Output:["_col1","_col2"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_294] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_293] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_22] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Union 21 [SIMPLE_EDGE] + <-Map 20 [CONTAINS] vectorized + Reduce Output Operator [RS_362] + PartitionCols:_col0 + Select Operator [SEL_361] (rows=285117831 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_360] (rows=285117831 width=11) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_32_item_i_item_sk_min) AND DynamicValue(RS_32_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_32_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_274] (rows=287989836 width=11) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_355] + Group By Operator [GBY_354] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_298] + Group By Operator [GBY_297] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_296] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_294] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_358] + Group By Operator [GBY_357] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + Group By Operator [GBY_303] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_302] (rows=453 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_300] + <-Map 24 [CONTAINS] vectorized + Reduce Output Operator [RS_365] + PartitionCols:_col0 + Select Operator [SEL_364] (rows=143930993 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_363] (rows=143930993 width=11) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_32_item_i_item_sk_min) AND DynamicValue(RS_32_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_32_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_279] (rows=144002668 width=11) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_356] + Please refer to the previous Group By Operator [GBY_354] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_359] + Please refer to the previous Group By Operator [GBY_357] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_263] (rows=525327388 width=114) + Conds:RS_312._col0=RS_314._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_311] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_310] (rows=525327388 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_77_customer_c_customer_sk_min) AND DynamicValue(RS_77_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_77_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_309] + Group By Operator [GBY_308] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_181] (rows=55046 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_268] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col0 + Select Operator [SEL_313] (rows=73049 width=8) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_333] + Select Operator [SEL_332] (rows=1 width=8) + Filter Operator [FIL_331] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_330] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_329] + Group By Operator [GBY_328] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_326] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_325] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query55.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query55.q.out new file mode 100644 index 0000000000..eb6b84fd09 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query55.q.out @@ -0,0 +1,123 @@ +PREHOOK: query: explain +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=36 + and d_moy=12 + and d_year=2001 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_79] + Limit [LIM_78] (rows=100 width=220) + Number of rows:100 + Select Operator [SEL_77] (rows=7333 width=220) + Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] + Select Operator [SEL_75] (rows=7333 width=220) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_74] (rows=7333 width=216) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1 + Group By Operator [GBY_16] (rows=7333 width=216) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col5, _col6 + Merge Join Operator [MERGEJOIN_54] (rows=2301098 width=104) + Conds:RS_12._col1=RS_65._col0(Inner),Output:["_col2","_col5","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=7333 width=107) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_63] (rows=7333 width=111) + predicate:(i_manager_id = 36) + TableScan [TS_6] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_53] (rows=15062131 width=4) + Conds:RS_73._col0=RS_57._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_57] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_55] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 2001)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_71] (rows=550076554 width=114) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_62] + Group By Operator [GBY_61] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_60] + Group By Operator [GBY_59] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_58] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_56] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_70] + Group By Operator [GBY_69] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_66] (rows=7333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_64] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out new file mode 100644 index 0000000000..b57ded3f6b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out @@ -0,0 +1,470 @@ +PREHOOK: query: explain +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('orchid','chiffon','lace')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 1 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -8 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 17 <- Reducer 21 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 32 <- Reducer 11 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE) +Map 33 <- Reducer 14 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 11 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 2 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 14 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 19 <- Map 28 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 16 (ONE_TO_ONE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 23 <- Map 28 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 20 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) +Reducer 26 <- Map 28 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_370] + Limit [LIM_369] (rows=100 width=212) + Number of rows:100 + Select Operator [SEL_368] (rows=430 width=212) + Output:["_col0","_col1"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_367] + Group By Operator [GBY_366] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] vectorized + Reduce Output Operator [RS_382] + PartitionCols:_col0 + Group By Operator [GBY_381] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_380] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_304] (rows=373066 width=100) + Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_294] (rows=17170 width=104) + Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] + PartitionCols:_col1 + Select Operator [SEL_318] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_325] + PartitionCols:_col0 + Group By Operator [GBY_324] (rows=11550 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Group By Operator [GBY_322] (rows=11550 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_321] (rows=23100 width=189) + Output:["i_item_id"] + Filter Operator [FIL_320] (rows=23100 width=189) + predicate:(i_color) IN ('orchid', 'chiffon', 'lace') + TableScan [TS_2] (rows=462000 width=189) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col3 + Select Operator [SEL_60] (rows=1550375 width=13) + Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_299] (rows=1550375 width=13) + Conds:RS_57._col1=RS_346._col0(Inner),Output:["_col2","_col3"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_346] + PartitionCols:_col0 + Select Operator [SEL_343] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_342] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -8) + TableScan [TS_15] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_298] (rows=7751872 width=98) + Conds:RS_379._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_330] + PartitionCols:_col0 + Select Operator [SEL_327] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_326] (rows=50 width=12) + predicate:((d_moy = 1) and (d_year = 2000)) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_379] + PartitionCols:_col0 + Select Operator [SEL_378] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_377] (rows=285117733 width=123) + predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_58_customer_address_ca_address_sk_min) AND DynamicValue(RS_58_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_58_customer_address_ca_address_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_64_item_i_item_sk_min) AND DynamicValue(RS_64_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_64_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_45] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_376] + Group By Operator [GBY_375] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_237] (rows=17170 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_372] + Group By Operator [GBY_371] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_338] + Group By Operator [GBY_335] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_331] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_327] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_374] + Group By Operator [GBY_373] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_354] + Group By Operator [GBY_351] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_347] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_343] + <-Reducer 13 [CONTAINS] vectorized + Reduce Output Operator [RS_394] + PartitionCols:_col0 + Group By Operator [GBY_393] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_392] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_106] + PartitionCols:_col0 + Group By Operator [GBY_105] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_305] (rows=189670 width=190) + Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col2 + Select Operator [SEL_97] (rows=788222 width=110) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_302] (rows=788222 width=110) + Conds:RS_94._col2=RS_348._col0(Inner),Output:["_col1","_col3"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_343] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_301] (rows=3941109 width=118) + Conds:RS_391._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_327] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_391] + PartitionCols:_col0 + Select Operator [SEL_390] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_389] (rows=143931246 width=123) + predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_95_customer_address_ca_address_sk_min) AND DynamicValue(RS_95_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_95_customer_address_ca_address_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_101_item_i_item_sk_min) AND DynamicValue(RS_101_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_101_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_82] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_388] + Group By Operator [GBY_387] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_285] + Group By Operator [GBY_284] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_283] (rows=17170 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_384] + Group By Operator [GBY_383] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + Group By Operator [GBY_336] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_333] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_327] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_386] + Group By Operator [GBY_385] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_355] + Group By Operator [GBY_352] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_349] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_343] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_365] + PartitionCols:_col0 + Group By Operator [GBY_364] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_363] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Group By Operator [GBY_32] (rows=430 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_303] (rows=692265 width=100) + Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col2 + Select Operator [SEL_24] (rows=2876890 width=4) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_296] (rows=2876890 width=4) + Conds:RS_21._col2=RS_344._col0(Inner),Output:["_col1","_col3"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_343] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_295] (rows=14384447 width=4) + Conds:RS_362._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_328] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_327] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_362] + PartitionCols:_col0 + Select Operator [SEL_361] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_360] (rows=525327191 width=118) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_341] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + Group By Operator [GBY_334] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_329] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_327] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_357] + Group By Operator [GBY_356] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_353] + Group By Operator [GBY_350] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_345] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_343] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_359] + Group By Operator [GBY_358] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_203] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_201] (rows=17170 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out new file mode 100644 index 0000000000..0ab124e144 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out @@ -0,0 +1,262 @@ +PREHOOK: query: explain +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Reducer 5 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_324] + Limit [LIM_323] (rows=100 width=758) + Number of rows:100 + Select Operator [SEL_322] (rows=130121 width=758) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_113] + Select Operator [SEL_112] (rows=130121 width=758) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_281] (rows=130121 width=646) + Conds:RS_109._col5, _col6, _col12, _col7=RS_309._col0, _col1, _col4, _col2(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col0, _col1, _col4, _col2 + Select Operator [SEL_307] (rows=87441185 width=404) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_305] (rows=87441185 width=408) + predicate:rank_window_0 is not null + PTF Operator [PTF_303] (rows=87441185 width=408) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] + Select Operator [SEL_302] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col1, _col0, _col4 + Group By Operator [GBY_299] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_95] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col0, _col1, _col4, _col5, _col7 + Merge Join Operator [MERGEJOIN_279] (rows=87441185 width=406) + Conds:RS_91._col2=RS_298._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] + PartitionCols:_col0 + Select Operator [SEL_297] (rows=60 width=102) + Output:["_col0","_col1"] + Filter Operator [FIL_296] (rows=60 width=102) + predicate:cc_name is not null + TableScan [TS_88] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col2 + Select Operator [SEL_87] (rows=87441185 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_278] (rows=87441185 width=312) + Conds:RS_84._col2=RS_295._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_294] (rows=462000 width=194) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_293] (rows=462000 width=194) + predicate:(i_brand is not null and i_category is not null) + TableScan [TS_78] (rows=462000 width=194) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_84] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_277] (rows=87441185 width=126) + Conds:RS_292._col0=RS_284._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + PartitionCols:_col0 + Select Operator [SEL_283] (rows=564 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_282] (rows=564 width=12) + predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001)) + TableScan [TS_75] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] + PartitionCols:_col0 + Select Operator [SEL_291] (rows=285117980 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_290] (rows=285117980 width=123) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_82_date_dim_d_date_sk_min) AND DynamicValue(RS_82_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_82_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_72] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] + Group By Operator [GBY_286] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_285] (rows=564 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_283] + <-Reducer 9 [ONE_TO_ONE_EDGE] + FORWARD [RS_109] + PartitionCols:_col5, _col6, _col12, _col7 + Merge Join Operator [MERGEJOIN_280] (rows=130121 width=636) + Conds:RS_310._col0, _col1, _col4, _col2=RS_321._col0, _col1, _col7, _col2(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] + PartitionCols:_col0, _col1, _col4, _col2 + Select Operator [SEL_308] (rows=87441185 width=404) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_306] (rows=87441185 width=408) + predicate:rank_window_0 is not null + PTF Operator [PTF_304] (rows=87441185 width=408) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] + Please refer to the previous Select Operator [SEL_302] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] + PartitionCols:_col0, _col1, _col7, _col2 + Select Operator [SEL_320] (rows=130121 width=524) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_319] (rows=130121 width=524) + predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END + Select Operator [SEL_318] (rows=260242 width=520) + Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_317] (rows=260242 width=520) + predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) + PTF Operator [PTF_316] (rows=87441185 width=520) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5"}] + Select Operator [SEL_315] (rows=87441185 width=520) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col1, _col0, _col4 + Select Operator [SEL_313] (rows=87441185 width=408) + Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_312] (rows=87441185 width=408) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col2"}] + Select Operator [SEL_311] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] + PartitionCols:_col1, _col0, _col4, _col2 + Please refer to the previous Group By Operator [GBY_299] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out new file mode 100644 index 0000000000..ac9e4123f5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out @@ -0,0 +1,397 @@ +Warning: Shuffle Join MERGEJOIN[401][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product +PREHOOK: query: explain +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '1998-02-19')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 17 (BROADCAST_EDGE) +Map 26 <- Reducer 18 (BROADCAST_EDGE) +Map 27 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 27 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 24 (ONE_TO_ONE_EDGE) +Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 25 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 26 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_466] + Limit [LIM_465] (rows=1 width=884) + Number of rows:100 + Select Operator [SEL_464] (rows=1 width=884) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_160] + Select Operator [SEL_159] (rows=1 width=884) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_155] (rows=1 width=1108) + predicate:(_col1 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col10 AND _col11 and _col9 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col6 AND _col7) + Merge Join Operator [MERGEJOIN_416] (rows=1 width=1108) + Conds:RS_152._col0=RS_463._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col9","_col10","_col11"] + <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_463] + PartitionCols:_col0 + Select Operator [SEL_462] (rows=69 width=436) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_461] (rows=69 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_144] + PartitionCols:_col0 + Group By Operator [GBY_143] (rows=69 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_414] (rows=31537 width=100) + Conds:RS_139._col0=RS_140._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_140] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_403] (rows=2 width=4) + Conds:RS_419._col1=RS_435._col0(Inner),Output:["_col0"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_419] + PartitionCols:_col1 + Select Operator [SEL_418] (rows=73049 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_417] (rows=73049 width=98) + predicate:d_date is not null + TableScan [TS_5] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 24 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_435] + PartitionCols:_col0 + Group By Operator [GBY_434] (rows=2 width=94) + Output:["_col0"],keys:KEY._col0 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_30] (rows=2 width=94) + Output:["_col0"],keys:_col2 + Merge Join Operator [MERGEJOIN_402] (rows=5 width=94) + Conds:RS_26._col1=RS_432._col1(Inner),Output:["_col2"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_432] + PartitionCols:_col1 + Select Operator [SEL_430] (rows=73049 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_428] (rows=73049 width=98) + predicate:(d_date is not null and d_week_seq is not null) + TableScan [TS_20] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_401] (rows=1 width=4) + Conds:(Inner),Output:["_col1"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_433] + Select Operator [SEL_431] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_429] (rows=1 width=98) + predicate:((d_date = '1998-02-19') and d_week_seq is not null) + Please refer to the previous TableScan [TS_20] + <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_427] + Select Operator [SEL_426] (rows=1 width=8) + Filter Operator [FIL_425] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_424] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_423] + Group By Operator [GBY_422] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_421] (rows=1 width=94) + Filter Operator [FIL_420] (rows=1 width=94) + predicate:(d_date = '1998-02-19') + TableScan [TS_8] (rows=73049 width=94) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_139] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_408] (rows=143966864 width=215) + Conds:RS_460._col1=RS_444._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_444] + PartitionCols:_col0 + Select Operator [SEL_441] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_460] + PartitionCols:_col1 + Select Operator [SEL_459] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_458] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_140_date_dim_d_date_sk_min) AND DynamicValue(RS_140_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_140_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_98] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_457] + Group By Operator [GBY_456] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_360] + Group By Operator [GBY_359] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_358] (rows=2 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_403] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_152] + PartitionCols:_col0 + Filter Operator [FIL_150] (rows=1 width=772) + predicate:(_col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col2 AND _col3) + Merge Join Operator [MERGEJOIN_415] (rows=68 width=772) + Conds:RS_447._col0=RS_455._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7"] + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_455] + PartitionCols:_col0 + Select Operator [SEL_454] (rows=69 width=436) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_453] (rows=69 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col0 + Group By Operator [GBY_94] (rows=69 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_413] (rows=120498 width=100) + Conds:RS_90._col0=RS_91._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_403] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_404] (rows=550076554 width=210) + Conds:RS_452._col1=RS_443._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_443] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_441] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_452] + PartitionCols:_col1 + Select Operator [SEL_451] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_450] (rows=550076554 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_91_date_dim_d_date_sk_min) AND DynamicValue(RS_91_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_91_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_49] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_449] + Group By Operator [GBY_448] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_314] + Group By Operator [GBY_313] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_312] (rows=2 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_403] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_447] + PartitionCols:_col0 + Select Operator [SEL_446] (rows=68 width=436) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_445] (rows=68 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Group By Operator [GBY_45] (rows=68 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_412] (rows=62327 width=100) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_403] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_400] (rows=286549727 width=215) + Conds:RS_440._col1=RS_442._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_442] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_441] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_440] + PartitionCols:_col1 + Select Operator [SEL_439] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_438] (rows=286549727 width=119) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_437] + Group By Operator [GBY_436] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_264] + Group By Operator [GBY_263] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_262] (rows=2 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_403] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out new file mode 100644 index 0000000000..f744013e32 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out @@ -0,0 +1,236 @@ +PREHOOK: query: explain +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales1,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185 and 1185 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1185+ 12 and 1185 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_208] + Limit [LIM_207] (rows=100 width=976) + Number of rows:100 + Select Operator [SEL_206] (rows=1012347 width=976) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_58] + Select Operator [SEL_57] (rows=1012347 width=976) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_182] (rows=1012347 width=1648) + Conds:RS_54._col2, _col1=RS_55._col0, _col7(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0, _col7 + Select Operator [SEL_53] (rows=28847 width=776) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_181] (rows=28847 width=776) + Conds:RS_50._col1=RS_205._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] + PartitionCols:_col0 + Select Operator [SEL_204] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_45] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_180] (rows=28847 width=676) + Conds:RS_203._col0=RS_199._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] + PartitionCols:_col0 + Select Operator [SEL_197] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_195] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) + TableScan [TS_15] (rows=73049 width=8) + default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + PartitionCols:_col0 + Group By Operator [GBY_202] (rows=1196832 width=679) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1 + Group By Operator [GBY_38] (rows=525329897 width=679) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 + Select Operator [SEL_36] (rows=525329897 width=138) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_179] (rows=525329897 width=138) + Conds:RS_186._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_183] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_187] (rows=73049 width=99) + predicate:d_week_seq is not null + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col2, _col1 + Select Operator [SEL_26] (rows=28847 width=976) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_178] (rows=28847 width=976) + Conds:RS_23._col1=RS_201._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_201] + PartitionCols:_col0 + Select Operator [SEL_200] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_18] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=28847 width=788) + Conds:RS_193._col0=RS_198._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] + PartitionCols:_col0 + Select Operator [SEL_196] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_194] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) + Please refer to the previous TableScan [TS_15] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] + PartitionCols:_col0 + Group By Operator [GBY_192] (rows=1196832 width=791) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=525329897 width=791) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 + Select Operator [SEL_9] (rows=525329897 width=142) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_176] (rows=525329897 width=142) + Conds:RS_185._col0=RS_190._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_185] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_184] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_188] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Please refer to the previous Filter Operator [FIL_187] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out new file mode 100644 index 0000000000..0962d0cf84 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out @@ -0,0 +1,235 @@ +Warning: Shuffle Join MERGEJOIN[175][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2000 + and d_moy = 2 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 16 (BROADCAST_EDGE) +Map 15 <- Reducer 12 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (XPROD_EDGE), Reducer 14 (XPROD_EDGE) +Reducer 13 <- Map 10 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 15 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_233] + Limit [LIM_232] (rows=1 width=94) + Number of rows:100 + Select Operator [SEL_231] (rows=1 width=94) + Output:["_col0","_col1"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + Filter Operator [FIL_229] (rows=1 width=94) + predicate:(_col1 >= 10L) + Group By Operator [GBY_228] (rows=1 width=94) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=1 width=94) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Select Operator [SEL_67] (rows=95896 width=309) + Output:["_col0"] + Filter Operator [FIL_66] (rows=95896 width=309) + predicate:(_col1 > _col4) + Map Join Operator [MAPJOIN_179] (rows=287690 width=309) + Conds:SEL_55._col2=RS_227._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col4"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_227] + PartitionCols:_col0 + Select Operator [SEL_226] (rows=10 width=202) + Output:["_col0","_col1"] + Group By Operator [GBY_225] (rows=10 width=210) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0 + Group By Operator [GBY_223] (rows=10 width=210) + Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category + Filter Operator [FIL_222] (rows=462000 width=201) + predicate:i_category is not null + TableScan [TS_56] (rows=462000 width=201) + default@item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_current_price","i_category"] + <-Select Operator [SEL_55] (rows=287690 width=287) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_178] (rows=287690 width=287) + Conds:MERGEJOIN_177._col0=RS_205._col0(Inner),HybridGraceHashJoin:true,Output:["_col4","_col5","_col9"] + <-Map 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_205] + PartitionCols:_col0 + Select Operator [SEL_204] (rows=660 width=4) + Output:["_col0"] + Map Join Operator [MAPJOIN_203] (rows=660 width=4) + Conds:RS_42._col0=SEL_202._col1(Inner),HybridGraceHashJoin:true,Output:["_col2"] + <-Reducer 12 [BROADCAST_EDGE] + BROADCAST [RS_42] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_175] (rows=25 width=4) + Conds:(Inner),Output:["_col0"] + <-Reducer 11 [XPROD_EDGE] vectorized + XPROD_EDGE [RS_192] + Group By Operator [GBY_191] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0 + Group By Operator [GBY_187] (rows=25 width=4) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_185] (rows=50 width=12) + Output:["d_month_seq"] + Filter Operator [FIL_183] (rows=50 width=12) + predicate:((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) + TableScan [TS_15] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 14 [XPROD_EDGE] vectorized + XPROD_EDGE [RS_200] + Select Operator [SEL_199] (rows=1 width=8) + Filter Operator [FIL_198] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_197] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_196] + Group By Operator [GBY_195] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_194] (rows=25 width=4) + Group By Operator [GBY_193] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Group By Operator [GBY_188] (rows=25 width=4) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_186] (rows=50 width=12) + Output:["d_month_seq"] + Filter Operator [FIL_184] (rows=50 width=12) + predicate:((d_moy = 2) and (d_year = 2000)) + Please refer to the previous TableScan [TS_15] + <-Select Operator [SEL_202] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_201] (rows=73049 width=8) + predicate:d_month_seq is not null + TableScan [TS_36] (rows=73049 width=8) + default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Merge Join Operator [MERGEJOIN_177] (rows=525327388 width=291) + Conds:RS_49._col2=RS_50._col0(Inner),Output:["_col0","_col4","_col5","_col9"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_173] (rows=525327388 width=209) + Conds:RS_213._col1=RS_216._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] + PartitionCols:_col1 + Select Operator [SEL_212] (rows=525327388 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_211] (rows=525327388 width=11) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_53_d_d_date_sk_min) AND DynamicValue(RS_53_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_53_d_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=462000 width=205) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_214] (rows=462000 width=205) + predicate:i_category is not null + TableScan [TS_3] (rows=462000 width=205) + default@item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_category"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_174] (rows=80000000 width=90) + Conds:RS_219._col1=RS_221._col0(Inner),Output:["_col0","_col3"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + PartitionCols:_col1 + Select Operator [SEL_218] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_217] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_6] (rows=80000000 width=8) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=40000000 width=90) + Output:["_col0","_col1"] + TableScan [TS_9] (rows=40000000 width=90) + default@customer_address,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out new file mode 100644 index 0000000000..cb94e4fddf --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out @@ -0,0 +1,496 @@ +PREHOOK: query: explain +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 17 <- Reducer 21 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 32 <- Reducer 11 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE) +Map 33 <- Reducer 14 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 11 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 2 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 14 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 19 <- Map 28 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 16 (ONE_TO_ONE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 23 <- Map 28 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 20 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) +Reducer 26 <- Map 28 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_375] + Limit [LIM_374] (rows=100 width=212) + Number of rows:100 + Select Operator [SEL_373] (rows=1717 width=212) + Output:["_col0","_col1"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_372] + Group By Operator [GBY_371] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] vectorized + Reduce Output Operator [RS_388] + PartitionCols:_col0 + Group By Operator [GBY_387] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_386] (rows=5151 width=212) + keys:_col0,sort order:+,top n:100 + Group By Operator [GBY_385] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_305] (rows=746132 width=100) + Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_295] (rows=34340 width=104) + Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col1 + Select Operator [SEL_322] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_329] + PartitionCols:_col0 + Group By Operator [GBY_328] (rows=23100 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] + PartitionCols:_col0 + Group By Operator [GBY_326] (rows=23100 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_325] (rows=46200 width=190) + Output:["i_item_id"] + Filter Operator [FIL_324] (rows=46200 width=190) + predicate:(i_category = 'Children') + TableScan [TS_2] (rows=462000 width=190) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col3 + Select Operator [SEL_60] (rows=1550375 width=13) + Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_300] (rows=1550375 width=13) + Conds:RS_57._col1=RS_350._col0(Inner),Output:["_col2","_col3"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + PartitionCols:_col0 + Select Operator [SEL_347] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_346] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -6) + TableScan [TS_15] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_299] (rows=7751872 width=98) + Conds:RS_384._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_334] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_330] (rows=50 width=12) + predicate:((d_moy = 9) and (d_year = 1999)) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_384] + PartitionCols:_col0 + Select Operator [SEL_383] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_382] (rows=285117733 width=123) + predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_58_customer_address_ca_address_sk_min) AND DynamicValue(RS_58_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_58_customer_address_ca_address_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_64_item_i_item_sk_min) AND DynamicValue(RS_64_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_64_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_45] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_381] + Group By Operator [GBY_380] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_240] + Group By Operator [GBY_239] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_238] (rows=34340 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_295] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_377] + Group By Operator [GBY_376] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_339] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_335] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_331] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_379] + Group By Operator [GBY_378] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_358] + Group By Operator [GBY_355] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_351] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_347] + <-Reducer 13 [CONTAINS] vectorized + Reduce Output Operator [RS_401] + PartitionCols:_col0 + Group By Operator [GBY_400] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_399] (rows=5151 width=212) + keys:_col0,sort order:+,top n:100 + Group By Operator [GBY_398] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_106] + PartitionCols:_col0 + Group By Operator [GBY_105] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_306] (rows=379339 width=201) + Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_295] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col2 + Select Operator [SEL_97] (rows=788222 width=110) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_303] (rows=788222 width=110) + Conds:RS_94._col2=RS_352._col0(Inner),Output:["_col1","_col3"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_352] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_347] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_302] (rows=3941109 width=118) + Conds:RS_397._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_336] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_397] + PartitionCols:_col0 + Select Operator [SEL_396] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_395] (rows=143931246 width=123) + predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_95_customer_address_ca_address_sk_min) AND DynamicValue(RS_95_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_95_customer_address_ca_address_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_101_item_i_item_sk_min) AND DynamicValue(RS_101_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_101_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_82] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_394] + Group By Operator [GBY_393] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_284] (rows=34340 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_295] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_390] + Group By Operator [GBY_389] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_337] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_331] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_392] + Group By Operator [GBY_391] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_359] + Group By Operator [GBY_356] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_353] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_347] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_370] + PartitionCols:_col0 + Group By Operator [GBY_369] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_368] (rows=5151 width=212) + keys:_col0,sort order:+,top n:100 + Group By Operator [GBY_367] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Group By Operator [GBY_32] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_304] (rows=1384530 width=100) + Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_295] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col2 + Select Operator [SEL_24] (rows=2876890 width=4) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_297] (rows=2876890 width=4) + Conds:RS_21._col2=RS_348._col0(Inner),Output:["_col1","_col3"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_347] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_296] (rows=14384447 width=4) + Conds:RS_366._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_366] + PartitionCols:_col0 + Select Operator [SEL_365] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_364] (rows=525327191 width=118) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_345] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_338] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_333] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_331] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_361] + Group By Operator [GBY_360] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_357] + Group By Operator [GBY_354] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_349] (rows=8000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_347] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_363] + Group By Operator [GBY_362] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_204] + Group By Operator [GBY_203] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_202] (rows=34340 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_295] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out new file mode 100644 index 0000000000..0ce574f38e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out @@ -0,0 +1,394 @@ +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 9' is a cross product +PREHOOK: query: explain +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE) +Map 30 <- Reducer 17 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 11 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 19 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 22 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 23 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 26 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 29 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 10 vectorized + File Output Operator [FS_343] + Limit [LIM_342] (rows=1 width=336) + Number of rows:100 + Select Operator [SEL_341] (rows=1 width=336) + Output:["_col0","_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_90] + Select Operator [SEL_89] (rows=1 width=336) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_273] (rows=1 width=448) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Select Operator [SEL_339] (rows=1 width=224) + Output:["_col0","_col1"] + Group By Operator [GBY_338] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_83] + Group By Operator [GBY_82] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(_col1)"] + Merge Join Operator [MERGEJOIN_272] (rows=529208 width=0) + Conds:RS_78._col0=RS_302._col0(Inner),Output:["_col1"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] + PartitionCols:_col0 + Select Operator [SEL_299] (rows=46200 width=4) + Output:["_col0"] + Filter Operator [FIL_298] (rows=46200 width=94) + predicate:(i_category = 'Electronics') + TableScan [TS_29] (rows=462000 width=94) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col0 + Select Operator [SEL_74] (rows=549816 width=4) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_271] (rows=549816 width=4) + Conds:RS_71._col0=RS_72._col1(Inner),Output:["_col3","_col5"] + <-Reducer 24 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_71] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_268] (rows=16000001 width=4) + Conds:RS_319._col1=RS_323._col0(Inner),Output:["_col0"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] + PartitionCols:_col1 + Select Operator [SEL_317] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_316] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_19] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Select Operator [SEL_321] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_320] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -7) + TableScan [TS_32] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col1 + Select Operator [SEL_67] (rows=2749079 width=4) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_270] (rows=2749079 width=4) + Conds:RS_64._col3=RS_278._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_278] + PartitionCols:_col0 + Select Operator [SEL_275] (rows=341 width=4) + Output:["_col0"] + Filter Operator [FIL_274] (rows=341 width=115) + predicate:(s_gmt_offset = -7) + TableScan [TS_3] (rows=1704 width=115) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_gmt_offset"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_269] (rows=13737330 width=4) + Conds:RS_337._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_290] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_286] (rows=50 width=12) + predicate:((d_moy = 11) and (d_year = 1999)) + TableScan [TS_16] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_337] + PartitionCols:_col0 + Select Operator [SEL_336] (rows=501694138 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_335] (rows=501694138 width=122) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_71_customer_c_customer_sk_min) AND DynamicValue(RS_71_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_71_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_79_item_i_item_sk_min) AND DynamicValue(RS_79_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_79_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_62_date_dim_d_date_sk_min) AND DynamicValue(RS_62_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_62_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_65_store_s_store_sk_min) AND DynamicValue(RS_65_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_65_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_52] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_330] + Group By Operator [GBY_329] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_283] + Group By Operator [GBY_281] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_279] (rows=341 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_275] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_328] + Group By Operator [GBY_327] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + Group By Operator [GBY_293] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_291] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_287] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_332] + Group By Operator [GBY_331] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14591048)"] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14591048)"] + Select Operator [SEL_237] (rows=16000001 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_268] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_334] + Group By Operator [GBY_333] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_303] (rows=46200 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_299] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_326] + Select Operator [SEL_325] (rows=1 width=224) + Output:["_col0","_col1"] + Group By Operator [GBY_324] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_43] + Group By Operator [GBY_42] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(_col1)"] + Merge Join Operator [MERGEJOIN_267] (rows=505397 width=0) + Conds:RS_38._col2=RS_322._col0(Inner),Output:["_col1"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_266] (rows=2526983 width=4) + Conds:RS_35._col0=RS_300._col0(Inner),Output:["_col1","_col2"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_299] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=2625387 width=8) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_265] (rows=2625387 width=8) + Conds:RS_25._col2=RS_318._col0(Inner),Output:["_col1","_col3","_col6"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_317] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_264] (rows=2625387 width=4) + Conds:RS_22._col0=RS_288._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_288] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_287] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_15] (rows=95880426 width=97) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_263] (rows=95880426 width=97) + Conds:RS_12._col4=RS_315._col0(Inner),Output:["_col0","_col1","_col2","_col5"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] + PartitionCols:_col0 + Select Operator [SEL_314] (rows=2300 width=4) + Output:["_col0"] + Filter Operator [FIL_313] (rows=2300 width=259) + predicate:((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) + TableScan [TS_6] (rows=2300 width=259) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_dmail","p_channel_email","p_channel_tv"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_262] (rows=95880426 width=100) + Conds:RS_312._col3=RS_276._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_276] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_275] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + PartitionCols:_col3 + Select Operator [SEL_311] (rows=479120970 width=126) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_310] (rows=479120970 width=126) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_36_item_i_item_sk_min) AND DynamicValue(RS_36_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_36_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_10_store_s_store_sk_min) AND DynamicValue(RS_10_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_10_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=126) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_promo_sk","ss_ext_sales_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_285] + Group By Operator [GBY_284] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] + Group By Operator [GBY_280] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_277] (rows=341 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_275] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_297] + Group By Operator [GBY_296] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + Group By Operator [GBY_292] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_289] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_287] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_309] + Group By Operator [GBY_308] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] + Group By Operator [GBY_304] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_301] (rows=46200 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_299] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out new file mode 100644 index 0000000000..a1f8413eb0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query63.q.out @@ -0,0 +1,163 @@ +PREHOOK: query: explain +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_86] + Limit [LIM_85] (rows=71 width=228) + Number of rows:100 + Select Operator [SEL_84] (rows=71 width=228) + Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + Select Operator [SEL_24] (rows=71 width=228) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_36] (rows=71 width=228) + predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END + Select Operator [SEL_23] (rows=143 width=116) + Output:["avg_window_0","_col0","_col2"] + PTF Operator [PTF_22] (rows=143 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] + Select Operator [SEL_19] (rows=143 width=116) + Output:["_col0","_col2"] + Group By Operator [GBY_18] (rows=143 width=120) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=143 width=120) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col4, _col6 + Merge Join Operator [MERGEJOIN_62] (rows=129200 width=8) + Conds:RS_12._col0=RS_73._col0(Inner),Output:["_col2","_col4","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_72] (rows=317 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_71] (rows=317 width=12) + predicate:(d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_moy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_61] (rows=744232 width=4) + Conds:RS_81._col1=RS_65._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=68 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_63] (rows=68 width=290) + predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants')) + TableScan [TS_3] (rows=462000 width=289) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + PartitionCols:_col1 + Select Operator [SEL_80] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_79] (rows=525329897 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_10_item_i_item_sk_min) AND DynamicValue(RS_10_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_10_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_70] + Group By Operator [GBY_69] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_66] (rows=68 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_64] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_78] + Group By Operator [GBY_77] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_76] + Group By Operator [GBY_75] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_74] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_72] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out new file mode 100644 index 0000000000..66402f415b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out @@ -0,0 +1,816 @@ +Warning: Shuffle Join MERGEJOIN[932][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[933][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[947][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[948][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 25' is a cross product +PREHOOK: query: explain +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_streen_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and + i_current_price between 35 and 35 + 10 and + i_current_price between 35 + 1 and 35 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_streen_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 + ,cs1.s2 + ,cs1.s3 + ,cs2.s1 + ,cs2.s2 + ,cs2.s3 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2000 and + cs2.syear = 2000 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 38 (BROADCAST_EDGE), Reducer 44 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE) +Map 35 <- Reducer 52 (BROADCAST_EDGE) +Map 54 <- Reducer 42 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE) +Map 55 <- Reducer 52 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE) +Reducer 10 <- Map 49 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 48 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 49 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 50 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 51 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 50 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 41 (ONE_TO_ONE_EDGE) +Reducer 22 <- Map 43 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 46 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 23 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 47 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 48 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Map 49 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 48 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 37 (ONE_TO_ONE_EDGE) +Reducer 30 <- Map 49 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 50 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 51 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 50 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (SIMPLE_EDGE) +Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 37 <- Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 43 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) +Reducer 41 <- Reducer 40 (SIMPLE_EDGE) +Reducer 42 <- Reducer 41 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 43 (CUSTOM_SIMPLE_EDGE) +Reducer 45 <- Map 43 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 46 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 52 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 47 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 48 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 18 vectorized + File Output Operator [FS_1067] + Select Operator [SEL_1066] (rows=3074457345618258602 width=3) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_207] + Select Operator [SEL_206] (rows=3074457345618258602 width=3) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] + Filter Operator [FIL_205] (rows=3074457345618258602 width=3) + predicate:(_col19 <= _col12) + Merge Join Operator [MERGEJOIN_957] (rows=9223372036854775807 width=1) + Conds:RS_1042._col2, _col1, _col3=RS_1065._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1042] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_1041] (rows=1914456248823429 width=1354) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Group By Operator [GBY_1040] (rows=1914456248823429 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Group By Operator [GBY_97] (rows=1914456248823429 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col6, _col13, _col7, _col4, _col5, _col9, _col10, _col11, _col12, _col14, _col16, _col17, _col18, _col19 + Merge Join Operator [MERGEJOIN_941] (rows=1914456248823429 width=1353) + Conds:RS_93._col8=RS_1037._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col16","_col17","_col18","_col19"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1037] + PartitionCols:_col0 + Select Operator [SEL_1035] (rows=40000000 width=365) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_76] (rows=40000000 width=365) + default@customer_address,ad1,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_940] (rows=1914456248823429 width=996) + Conds:RS_90._col0=RS_960._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Map 51 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_960] + PartitionCols:_col0 + Select Operator [SEL_959] (rows=518 width=111) + Output:["_col0","_col1"] + Filter Operator [FIL_958] (rows=518 width=312) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50) + TableScan [TS_85] (rows=462000 width=311) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_939] (rows=177398042779540896 width=51) + Conds:RS_81._col1=RS_1036._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col13","_col14","_col15","_col16"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1036] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1035] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_938] (rows=177398042779540896 width=51) + Conds:RS_78._col9=RS_1032._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1032] + PartitionCols:_col0 + Select Operator [SEL_1030] (rows=7200 width=4) + Output:["_col0"] + Filter Operator [FIL_1029] (rows=7200 width=8) + predicate:hd_income_band_sk is not null + TableScan [TS_60] (rows=7200 width=8) + default@household_demographics,hd1,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col9 + Select Operator [SEL_72] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_71] (rows=177398042779540896 width=51) + predicate:(_col13 <> _col16) + Merge Join Operator [MERGEJOIN_937] (rows=177398042779540896 width=51) + Conds:RS_68._col10=RS_1026._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col16"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1026] + PartitionCols:_col0 + Select Operator [SEL_1024] (rows=1861800 width=89) + Output:["_col0","_col1"] + TableScan [TS_51] (rows=1861800 width=89) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_936] (rows=174924398677811552 width=52) + Conds:RS_65._col1=RS_1031._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1031] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1030] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col1 + Select Operator [SEL_59] (rows=174924398677811552 width=52) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_935] (rows=174924398677811552 width=52) + Conds:RS_56._col2=RS_1025._col0(Inner),Output:["_col0","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col21"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1025] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1024] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_934] (rows=172485247150217472 width=53) + Conds:RS_53._col1, _col10, _col8=RS_1022._col0, _col4, _col5(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1022] + PartitionCols:_col0, _col4, _col5 + Select Operator [SEL_1021] (rows=69376329 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_1020] (rows=69376329 width=23) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) + TableScan [TS_48] (rows=80000000 width=23) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1, _col10, _col8 + Select Operator [SEL_47] (rows=172485247150217474 width=53) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_933] (rows=172485247150217474 width=53) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_996] + Select Operator [SEL_990] (rows=73049 width=8) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_44] + Merge Join Operator [MERGEJOIN_932] (rows=2361226671826 width=540) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_995] + Select Operator [SEL_989] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_25] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_41] + Merge Join Operator [MERGEJOIN_931] (rows=32323874 width=328) + Conds:RS_38._col6=RS_1018._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13"] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1018] + PartitionCols:_col0 + Select Operator [SEL_1017] (rows=1704 width=181) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1016] (rows=1704 width=181) + predicate:(s_store_name is not null and s_zip is not null) + TableScan [TS_28] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_930] (rows=32323874 width=153) + Conds:RS_35._col0=RS_1000._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 43 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1000] + PartitionCols:_col0 + Select Operator [SEL_994] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_988] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_25] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_24] (rows=90527915 width=289) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_929] (rows=90527915 width=289) + Conds:RS_21._col1=RS_982._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Reducer 37 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_982] + PartitionCols:_col0 + Select Operator [SEL_981] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_980] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_979] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_928] (rows=183085709 width=227) + Conds:RS_975._col0, _col1=RS_977._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_977] + PartitionCols:_col0, _col1 + Select Operator [SEL_976] (rows=28798881 width=120) + Output:["_col0","_col1","_col2"] + TableScan [TS_7] (rows=28798881 width=337) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_975] + PartitionCols:_col0, _col1 + Select Operator [SEL_974] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_973] (rows=287989836 width=119) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter))) + TableScan [TS_5] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_970] + Group By Operator [GBY_968] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_966] + Group By Operator [GBY_964] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_961] (rows=518 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_959] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_927] (rows=427216373 width=348) + Conds:RS_1012._col1, _col7=RS_1014._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1014] + PartitionCols:_col0, _col1 + Select Operator [SEL_1013] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1012] + PartitionCols:_col1, _col7 + Select Operator [SEL_1011] (rows=417313408 width=351) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_1010] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_22_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_22_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_969] + Please refer to the previous Group By Operator [GBY_968] + <-Reducer 38 [BROADCAST_EDGE] vectorized + BROADCAST [RS_987] + Group By Operator [GBY_986] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_985] + Group By Operator [GBY_984] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_983] (rows=13257 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_981] + <-Reducer 44 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1009] + Group By Operator [GBY_1008] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1006] + Group By Operator [GBY_1004] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1001] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_994] + <-Reducer 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1065] + PartitionCols:_col1, _col0, _col2 + Select Operator [SEL_1064] (rows=1914456248823429 width=525) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_1063] (rows=1914456248823429 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_199] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Group By Operator [GBY_198] (rows=1914456248823429 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col6, _col13, _col7, _col4, _col5, _col9, _col10, _col11, _col12, _col14, _col16, _col17, _col18, _col19 + Merge Join Operator [MERGEJOIN_956] (rows=1914456248823429 width=1353) + Conds:RS_194._col8=RS_1039._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col16","_col17","_col18","_col19"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1039] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1035] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_194] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_955] (rows=1914456248823429 width=996) + Conds:RS_191._col0=RS_962._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Map 51 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_962] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_959] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_185] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_954] (rows=177398042779540896 width=51) + Conds:RS_182._col1=RS_1038._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col13","_col14","_col15","_col16"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1038] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1035] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_182] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_953] (rows=177398042779540896 width=51) + Conds:RS_179._col9=RS_1034._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1034] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1030] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_179] + PartitionCols:_col9 + Select Operator [SEL_173] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_172] (rows=177398042779540896 width=51) + predicate:(_col13 <> _col16) + Merge Join Operator [MERGEJOIN_952] (rows=177398042779540896 width=51) + Conds:RS_169._col10=RS_1028._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col16"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1028] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1024] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_169] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_951] (rows=174924398677811552 width=52) + Conds:RS_166._col1=RS_1033._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1033] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1030] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_166] + PartitionCols:_col1 + Select Operator [SEL_160] (rows=174924398677811552 width=52) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_950] (rows=174924398677811552 width=52) + Conds:RS_157._col2=RS_1027._col0(Inner),Output:["_col0","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col21"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1027] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1024] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_157] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_949] (rows=172485247150217472 width=53) + Conds:RS_154._col1, _col10, _col8=RS_1023._col0, _col4, _col5(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1023] + PartitionCols:_col0, _col4, _col5 + Please refer to the previous Select Operator [SEL_1021] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_154] + PartitionCols:_col1, _col10, _col8 + Select Operator [SEL_148] (rows=172485247150217474 width=53) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_948] (rows=172485247150217474 width=53) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_999] + Select Operator [SEL_993] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_25] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_145] + Merge Join Operator [MERGEJOIN_947] (rows=2361226671826 width=540) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_998] + Select Operator [SEL_992] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_25] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_142] + Merge Join Operator [MERGEJOIN_946] (rows=32323874 width=328) + Conds:RS_139._col6=RS_1019._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13"] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1019] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1017] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_139] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_945] (rows=32323874 width=153) + Conds:RS_136._col0=RS_1002._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 43 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1002] + PartitionCols:_col0 + Select Operator [SEL_997] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_991] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_25] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=90527915 width=289) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_944] (rows=90527915 width=289) + Conds:RS_122._col1=RS_1052._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Reducer 41 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1052] + PartitionCols:_col0 + Select Operator [SEL_1051] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_1050] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1049] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_115] + PartitionCols:_col0 + Group By Operator [GBY_114] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_943] (rows=183085709 width=227) + Conds:RS_1048._col0, _col1=RS_978._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_978] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_976] + <-Map 55 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1048] + PartitionCols:_col0, _col1 + Select Operator [SEL_1047] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1046] (rows=287989836 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_192_item_i_item_sk_min) AND DynamicValue(RS_192_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_192_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter)))) + TableScan [TS_106] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_972] + Please refer to the previous Group By Operator [GBY_968] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1045] + Group By Operator [GBY_1043] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_967] + Group By Operator [GBY_965] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_963] (rows=518 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_959] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_122] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_942] (rows=427216373 width=348) + Conds:RS_1062._col1, _col7=RS_1015._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1015] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1013] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1062] + PartitionCols:_col1, _col7 + Select Operator [SEL_1061] (rows=417313408 width=351) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_1060] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_123_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_123_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_123_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_192_item_i_item_sk_min) AND DynamicValue(RS_192_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_192_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_137_d1_d_date_sk_min) AND DynamicValue(RS_137_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_137_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_101] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_971] + Please refer to the previous Group By Operator [GBY_968] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1044] + Please refer to the previous Group By Operator [GBY_1043] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1057] + Group By Operator [GBY_1056] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1055] + Group By Operator [GBY_1054] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1053] (rows=13257 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1051] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1059] + Group By Operator [GBY_1058] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1007] + Group By Operator [GBY_1005] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1003] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_997] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out new file mode 100644 index 0000000000..87d0f6fc87 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out @@ -0,0 +1,220 @@ +PREHOOK: query: explain +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 10 (BROADCAST_EDGE) +Map 14 <- Reducer 13 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_173] + Limit [LIM_172] (rows=100 width=705) + Number of rows:100 + Select Operator [SEL_171] (rows=65392 width=704) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_48] + Select Operator [SEL_47] (rows=65392 width=704) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_134] (rows=65392 width=704) + Conds:RS_44._col1=RS_170._col0(Inner),Output:["_col2","_col6","_col8","_col9","_col10","_col11"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + PartitionCols:_col0 + Select Operator [SEL_169] (rows=462000 width=511) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_35] (rows=462000 width=511) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_133] (rows=65392 width=204) + Conds:RS_41._col0=RS_168._col0(Inner),Output:["_col1","_col2","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_168] + PartitionCols:_col0 + Select Operator [SEL_167] (rows=1704 width=92) + Output:["_col0","_col1"] + TableScan [TS_33] (rows=1704 width=92) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_41] + PartitionCols:_col0 + Filter Operator [FIL_40] (rows=65392 width=231) + predicate:(_col2 <= _col4) + Merge Join Operator [MERGEJOIN_132] (rows=196176 width=231) + Conds:RS_151._col0=RS_166._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_151] + PartitionCols:_col0 + Group By Operator [GBY_150] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_130] (rows=91197860 width=89) + Conds:RS_149._col0=RS_137._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_137] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_135] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_147] (rows=525329897 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + Group By Operator [GBY_141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_138] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_136] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + PartitionCols:_col0 + Select Operator [SEL_165] (rows=17 width=115) + Output:["_col0","_col1"] + Group By Operator [GBY_164] (rows=17 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_163] (rows=184637 width=118) + Output:["_col1","_col2"] + Group By Operator [GBY_162] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_131] (rows=91197860 width=89) + Conds:RS_161._col0=RS_139._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_136] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_159] (rows=525329897 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_37_store_sales_ss_store_sk_min) AND DynamicValue(RS_37_store_sales_ss_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_37_store_sales_ss_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_14] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + Group By Operator [GBY_142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_140] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_136] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_152] (rows=184637 width=2) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_150] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out new file mode 100644 index 0000000000..1e742bef12 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out @@ -0,0 +1,702 @@ +PREHOOK: query: explain +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@warehouse +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 and 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + union all + (select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_ship_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2002 + and t_time between 49530 AND 49530+28800 + and sm_carrier in ('DIAMOND','AIRBORNE') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@warehouse +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) +Map 25 <- Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 19 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 17 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_267] + Select Operator [SEL_266] (rows=100 width=4614) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] + Limit [LIM_265] (rows=100 width=4510) + Number of rows:100 + Select Operator [SEL_264] (rows=144 width=4510) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_263] + Group By Operator [GBY_262] (rows=144 width=4510) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] vectorized + Reduce Output Operator [RS_281] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_280] (rows=144 width=4510) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Top N Key Operator [TNK_279] (rows=288 width=3166) + keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 + Select Operator [SEL_278] (rows=288 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Group By Operator [GBY_277] (rows=144 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_60] (rows=24840 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)"],keys:_col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_202] (rows=11119518 width=3166) + Conds:RS_56._col1=RS_243._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] + PartitionCols:_col0 + Select Operator [SEL_240] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_239] (rows=1 width=88) + predicate:(sm_carrier) IN ('DIAMOND', 'AIRBORNE') + TableScan [TS_18] (rows=1 width=88) + default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_201] (rows=11119518 width=3169) + Conds:RS_53._col0=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_228] (rows=9600 width=4) + Output:["_col0"] + Filter Operator [FIL_227] (rows=9600 width=8) + predicate:t_time BETWEEN 49530 AND 78330 + TableScan [TS_15] (rows=86400 width=8) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col0 + Select Operator [SEL_46] (rows=100075658 width=3173) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"] + Merge Join Operator [MERGEJOIN_200] (rows=100075658 width=757) + Conds:RS_43._col3=RS_256._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21","_col22","_col23","_col24","_col25"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_256] + PartitionCols:_col0 + Select Operator [SEL_254] (rows=27 width=482) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + TableScan [TS_6] (rows=27 width=482) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_199] (rows=100075658 width=283) + Conds:RS_276._col0=RS_219._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=652 width=52) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Filter Operator [FIL_215] (rows=652 width=12) + predicate:(d_year = 2002) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_276] + PartitionCols:_col0 + Select Operator [SEL_275] (rows=282272460 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_274] (rows=282272460 width=243) + predicate:((cs_ship_mode_sk BETWEEN DynamicValue(RS_57_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_57_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_57_ship_mode_sm_ship_mode_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_41_date_dim_d_date_sk_min) AND DynamicValue(RS_41_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_41_date_dim_d_date_sk_bloom_filter))) and (cs_sold_time_sk BETWEEN DynamicValue(RS_54_time_dim_t_time_sk_min) AND DynamicValue(RS_54_time_dim_t_time_sk_max) and in_bloom_filter(cs_sold_time_sk, DynamicValue(RS_54_time_dim_t_time_sk_bloom_filter))) and cs_ship_mode_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_32] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_269] + Group By Operator [GBY_268] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + Group By Operator [GBY_222] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_220] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_271] + Group By Operator [GBY_270] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_232] (rows=9600 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_228] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_273] + Group By Operator [GBY_272] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] + Group By Operator [GBY_246] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_244] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_240] + <-Reducer 6 [CONTAINS] vectorized + Reduce Output Operator [RS_261] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_260] (rows=144 width=4510) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Top N Key Operator [TNK_259] (rows=288 width=3166) + keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 + Select Operator [SEL_258] (rows=288 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Group By Operator [GBY_257] (rows=144 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_28] (rows=12780 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)"],keys:_col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_198] (rows=5707369 width=3166) + Conds:RS_24._col1=RS_241._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_240] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_197] (rows=5707369 width=3169) + Conds:RS_21._col0=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_228] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=51366312 width=3173) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31"] + Merge Join Operator [MERGEJOIN_196] (rows=51366312 width=757) + Conds:RS_11._col3=RS_255._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21","_col22","_col23","_col24","_col25"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_254] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_195] (rows=51366312 width=283) + Conds:RS_253._col0=RS_217._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_216] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] + PartitionCols:_col0 + Select Operator [SEL_252] (rows=143859154 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_251] (rows=143859154 width=243) + predicate:((ws_ship_mode_sk BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_22_time_dim_t_time_sk_min) AND DynamicValue(RS_22_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_22_time_dim_t_time_sk_bloom_filter))) and ws_ship_mode_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk is not null) + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_218] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_230] (rows=9600 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_228] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_250] + Group By Operator [GBY_249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_247] + Group By Operator [GBY_245] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_242] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_240] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out new file mode 100644 index 0000000000..7fc3337d4b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query67.q.out @@ -0,0 +1,198 @@ +PREHOOK: query: explain +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 6 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_108] + Limit [LIM_107] (rows=100 width=617) + Number of rows:100 + Select Operator [SEL_106] (rows=273593580 width=617) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + Select Operator [SEL_104] (rows=273593580 width=617) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_103] (rows=273593580 width=613) + predicate:(rank_window_0 <= 100) + PTF Operator [PTF_102] (rows=820780740 width=613) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col2"}] + Select Operator [SEL_101] (rows=820780740 width=613) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] + PartitionCols:_col2 + Select Operator [SEL_99] (rows=820780740 width=613) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Group By Operator [GBY_98] (rows=820780740 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Group By Operator [GBY_21] (rows=820780740 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, 0L + Merge Join Operator [MERGEJOIN_82] (rows=91197860 width=613) + Conds:RS_84._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=462000 width=393) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_0] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=91197860 width=228) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_81] (rows=91197860 width=228) + Conds:RS_13._col2=RS_97._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_97] + PartitionCols:_col0 + Select Operator [SEL_96] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_8] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=91197860 width=130) + Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_87] + PartitionCols:_col0 + Select Operator [SEL_86] (rows=317 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_85] (rows=317 width=20) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_5] (rows=73049 width=20) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_95] + PartitionCols:_col0 + Select Operator [SEL_94] (rows=525329897 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_93] (rows=525329897 width=122) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_88] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_86] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out new file mode 100644 index 0000000000..b9d7a889ba --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out @@ -0,0 +1,256 @@ +PREHOOK: query: explain +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 8 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 16 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_183] + Limit [LIM_182] (rows=100 width=706) + Number of rows:100 + Select Operator [SEL_181] (rows=4418634 width=706) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_45] + Select Operator [SEL_44] (rows=4418634 width=706) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_43] (rows=4418634 width=706) + predicate:(_col5 <> _col8) + Merge Join Operator [MERGEJOIN_144] (rows=4418634 width=706) + Conds:RS_40._col0=RS_180._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_139] (rows=80000000 width=277) + Conds:RS_147._col1=RS_149._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=40000000 width=97) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=40000000 width=97) + default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col1 + Select Operator [SEL_146] (rows=80000000 width=188) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_145] (rows=80000000 width=188) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=188) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_180] + PartitionCols:_col1 + Select Operator [SEL_179] (rows=4418634 width=433) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_178] (rows=4418634 width=433) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_33] (rows=4418634 width=433) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)"],keys:_col0, _col9, _col2, _col3 + Merge Join Operator [MERGEJOIN_143] (rows=4418634 width=97) + Conds:RS_29._col2=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col9"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_148] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_142] (rows=4418634 width=4) + Conds:RS_26._col1=RS_169._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_169] + PartitionCols:_col0 + Select Operator [SEL_168] (rows=1855 width=4) + Output:["_col0"] + Filter Operator [FIL_167] (rows=1855 width=12) + predicate:((hd_dep_count = 2) or (hd_vehicle_count = 1)) + TableScan [TS_21] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col1 + Select Operator [SEL_20] (rows=17150490 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_141] (rows=17150490 width=4) + Conds:RS_17._col4=RS_161._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=85 width=4) + Output:["_col0"] + Filter Operator [FIL_159] (rows=85 width=97) + predicate:(s_city) IN ('Cedar Grove', 'Wildwood') + TableScan [TS_11] (rows=1704 width=97) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_140] (rows=42598570 width=185) + Conds:RS_177._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_152] (rows=170 width=4) + Output:["_col0"] + Filter Operator [FIL_151] (rows=170 width=12) + predicate:((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2) + TableScan [TS_8] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_177] + PartitionCols:_col0 + Select Operator [SEL_176] (rows=457565061 width=343) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_175] (rows=457565061 width=343) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_27_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_27_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_27_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=343) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_ext_sales_price","ss_ext_list_price","ss_ext_tax"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_154] (rows=170 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_152] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_164] + Group By Operator [GBY_163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_162] (rows=85 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_160] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_174] + Group By Operator [GBY_173] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_170] (rows=1855 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_168] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out new file mode 100644 index 0000000000..8fe313495f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out @@ -0,0 +1,359 @@ +PREHOOK: query: explain +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_238] + Limit [LIM_237] (rows=1 width=383) + Number of rows:100 + Select Operator [SEL_236] (rows=1 width=383) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + Select Operator [SEL_234] (rows=1 width=383) + Output:["_col0","_col1","_col2","_col3","_col4","_col6"] + Group By Operator [GBY_233] (rows=1 width=367) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_67] (rows=1 width=367) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 + Top N Key Operator [TNK_104] (rows=1 width=363) + keys:_col6, _col7, _col8, _col9, _col10,sort order:+++++,top n:100 + Select Operator [SEL_66] (rows=1 width=363) + Output:["_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_65] (rows=1 width=363) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_183] (rows=1 width=363) + Conds:RS_62._col0=RS_232._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col14"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_62] + PartitionCols:_col0 + Select Operator [SEL_47] (rows=1 width=367) + Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_46] (rows=1 width=367) + predicate:_col12 is null + Merge Join Operator [MERGEJOIN_182] (rows=33 width=367) + Conds:RS_43._col0=RS_222._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_43] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_181] (rows=6841 width=363) + Conds:RS_40._col0=RS_41._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_177] (rows=4605476 width=363) + Conds:RS_35._col1=RS_191._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_190] (rows=1861800 width=363) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_6] (rows=1861800 width=363) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=4541258 width=5) + Conds:RS_186._col2=RS_189._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col2 + Select Operator [SEL_185] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_184] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0 + Select Operator [SEL_188] (rows=2352941 width=90) + Output:["_col0"] + Filter Operator [FIL_187] (rows=2352941 width=90) + predicate:(ca_state) IN ('CO', 'IL', 'MN') + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Group By Operator [GBY_39] (rows=116289 width=1) + Output:["_col0"],keys:_col0 + Select Operator [SEL_17] (rows=43153353 width=1) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_178] (rows=43153353 width=1) + Conds:RS_212._col0=RS_194._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=150 width=4) + Output:["_col0"] + Filter Operator [FIL_192] (rows=150 width=12) + predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3) + TableScan [TS_11] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + PartitionCols:_col0 + Select Operator [SEL_211] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_210] (rows=525327388 width=7) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_40_c_c_customer_sk_min) AND DynamicValue(RS_40_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_40_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_8] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_209] + Group By Operator [GBY_208] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4291485)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4291485)"] + Select Operator [SEL_135] (rows=4605476 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_177] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_207] + Group By Operator [GBY_206] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_195] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_222] + PartitionCols:_col0 + Select Operator [SEL_221] (rows=116289 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_220] (rows=116289 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=116289 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_179] (rows=11823304 width=3) + Conds:RS_219._col0=RS_196._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_196] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_193] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_219] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_217] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_43_c_c_customer_sk_min) AND DynamicValue(RS_43_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_43_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_18] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_152] + Group By Operator [GBY_151] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_150] (rows=6841 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_181] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_204] + Group By Operator [GBY_201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_197] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_232] + PartitionCols:_col0 + Select Operator [SEL_231] (rows=115467 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_230] (rows=115467 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col0 + Group By Operator [GBY_58] (rows=115467 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_180] (rows=23255411 width=3) + Conds:RS_229._col0=RS_198._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_198] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_193] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + PartitionCols:_col0 + Select Operator [SEL_228] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_227] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_62_c_c_customer_sk_min) AND DynamicValue(RS_62_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_62_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_48] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_224] + Group By Operator [GBY_223] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_205] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_199] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_167] + Group By Operator [GBY_166] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_165] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_47] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out new file mode 100644 index 0000000000..44174604b0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out @@ -0,0 +1,173 @@ +PREHOOK: query: explain +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_129] + Limit [LIM_128] (rows=100 width=444) + Number of rows:100 + Select Operator [SEL_127] (rows=310774 width=444) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + Select Operator [SEL_125] (rows=310774 width=444) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_124] (rows=310774 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=462000 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col5)","count(_col5)","sum(_col4)","count(_col4)"],keys:_col8 + Top N Key Operator [TNK_55] (rows=1441769 width=100) + keys:_col8,sort order:+,top n:100 + Merge Join Operator [MERGEJOIN_99] (rows=1441769 width=100) + Conds:RS_24._col0=RS_123._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_122] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_19] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_98] (rows=1441769 width=4) + Conds:RS_21._col1=RS_121._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + PartitionCols:_col0 + Select Operator [SEL_120] (rows=2300 width=4) + Output:["_col0"] + Filter Operator [FIL_119] (rows=2300 width=174) + predicate:((p_channel_email = 'N') or (p_channel_event = 'N')) + TableScan [TS_16] (rows=2300 width=174) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Select Operator [SEL_15] (rows=1441769 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_97] (rows=1441769 width=4) + Conds:RS_12._col0=RS_110._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + PartitionCols:_col0 + Select Operator [SEL_109] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_108] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_96] (rows=4037893 width=4) + Conds:RS_118._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=14776 width=4) + Output:["_col0"] + Filter Operator [FIL_100] (rows=14776 width=268) + predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W')) + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + PartitionCols:_col2 + Select Operator [SEL_117] (rows=501686735 width=340) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_116] (rows=501686735 width=340) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_10_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_10_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_10_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_cdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=340) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_111] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_104] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_103] (rows=14776 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_101] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out new file mode 100644 index 0000000000..184d281df0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out @@ -0,0 +1,216 @@ +PREHOOK: query: explain +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_168] + Limit [LIM_167] (rows=100 width=492) + Number of rows:100 + Select Operator [SEL_166] (rows=240 width=492) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_165] + Select Operator [SEL_164] (rows=240 width=492) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_163] (rows=240 width=304) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] + Select Operator [SEL_162] (rows=240 width=304) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_161] + PartitionCols:(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END + Select Operator [SEL_160] (rows=240 width=304) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_159] (rows=240 width=304) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_48] (rows=18000 width=304) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L + Select Operator [SEL_46] (rows=29778893 width=207) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_134] (rows=29778893 width=207) + Conds:RS_43._col7=RS_158._col0(Inner),Output:["_col2","_col6","_col7"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=16 width=86) + Output:["_col0"] + Filter Operator [FIL_156] (rows=16 width=198) + predicate:(rank_window_0 <= 5) + PTF Operator [PTF_155] (rows=49 width=198) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] + Select Operator [SEL_154] (rows=49 width=198) + Output:["_col0","_col1"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Group By Operator [GBY_152] (rows=49 width=198) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=1704 width=198) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_133] (rows=91197860 width=168) + Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_130] (rows=91197860 width=85) + Conds:RS_145._col0=RS_137._col0(Inner),Output:["_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_137] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=317 width=8) + Output:["_col0"] + Filter Operator [FIL_135] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + PartitionCols:_col0 + Select Operator [SEL_144] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_143] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_142] + Group By Operator [GBY_141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + Group By Operator [GBY_139] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_138] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_136] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=1704 width=90) + predicate:s_state is not null + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_131] (rows=91197860 width=266) + Conds:RS_40._col1=RS_148._col0(Inner),Output:["_col2","_col6","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_130] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=1704 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_146] (rows=1704 width=188) + predicate:s_state is not null + TableScan [TS_6] (rows=1704 width=188) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out new file mode 100644 index 0000000000..81f7961df9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out @@ -0,0 +1,297 @@ +PREHOOK: query: explain +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 10 <- Reducer 13 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 18 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) +Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_188] + Select Operator [SEL_187] (rows=1991967 width=223) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + Select Operator [SEL_185] (rows=1991967 width=227) + Output:["_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_184] (rows=1991967 width=223) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_45] (rows=1991967 width=223) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col0)"],keys:_col4, _col7, _col8, _col5 + Merge Join Operator [MERGEJOIN_140] (rows=1991967 width=112) + Conds:RS_41._col2=RS_173._col0(Inner),Output:["_col0","_col4","_col5","_col7","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_173] + PartitionCols:_col0 + Select Operator [SEL_172] (rows=43200 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_171] (rows=43200 width=99) + predicate:(t_meal_time) IN ('breakfast', 'dinner') + TableScan [TS_35] (rows=86400 width=99) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_139] (rows=3983933 width=104) + Conds:Union 3._col1=RS_163._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Select Operator [SEL_162] (rows=7333 width=107) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_161] (rows=7333 width=111) + predicate:(i_manager_id = 1) + TableScan [TS_32] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] + <-Union 3 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] + Reduce Output Operator [RS_148] + PartitionCols:_col1 + Select Operator [SEL_146] (rows=7751851 width=98) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_145] (rows=7751851 width=98) + Conds:RS_199._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_190] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_189] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 2001)) + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] + PartitionCols:_col0 + Select Operator [SEL_198] (rows=285116947 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_197] (rows=285116947 width=123) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) and (cs_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(cs_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and cs_sold_date_sk is not null and cs_sold_time_sk is not null) + TableScan [TS_10] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_169] + Group By Operator [GBY_167] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_164] (rows=7333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_162] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_179] + Group By Operator [GBY_177] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_176] + Group By Operator [GBY_175] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_174] (rows=43200 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_172] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_196] + Group By Operator [GBY_195] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + Group By Operator [GBY_193] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_192] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 15 [CONTAINS] + Reduce Output Operator [RS_152] + PartitionCols:_col1 + Select Operator [SEL_150] (rows=14384397 width=4) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_149] (rows=14384397 width=4) + Conds:RS_210._col0=RS_202._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + PartitionCols:_col0 + Select Operator [SEL_201] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_200] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 2001)) + TableScan [TS_24] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_210] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=525325345 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_208] (rows=525325345 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_sold_time_sk is not null) + TableScan [TS_21] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_170] + Please refer to the previous Group By Operator [GBY_167] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_180] + Please refer to the previous Group By Operator [GBY_177] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_207] + Group By Operator [GBY_206] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_205] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_203] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_201] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_144] + PartitionCols:_col1 + Select Operator [SEL_142] (rows=3941098 width=118) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_141] (rows=3941098 width=118) + Conds:RS_183._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_155] + PartitionCols:_col0 + Select Operator [SEL_154] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_153] (rows=50 width=12) + predicate:((d_moy = 12) and (d_year = 2001)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col0 + Select Operator [SEL_182] (rows=143930836 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_181] (rows=143930836 width=123) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_sold_time_sk is not null) + TableScan [TS_0] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_168] + Please refer to the previous Group By Operator [GBY_167] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_178] + Please refer to the previous Group By Operator [GBY_177] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_156] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_154] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out new file mode 100644 index 0000000000..13aee807cf --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out @@ -0,0 +1,301 @@ +PREHOOK: query: explain +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 15 <- Reducer 17 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 25 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 19 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 23 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 13 vectorized + File Output Operator [FS_298] + Limit [LIM_297] (rows=100 width=312) + Number of rows:100 + Select Operator [SEL_296] (rows=3148061 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + Group By Operator [GBY_294] (rows=3148061 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_65] (rows=558148039 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col5)","count(_col6)","count()"],keys:_col3, _col2, _col4 + Select Operator [SEL_63] (rows=558148039 width=292) + Output:["_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_251] (rows=558148039 width=292) + Conds:RS_60._col0, _col2=RS_293._col0, _col1(Left Outer),Output:["_col3","_col4","_col5","_col6"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0, _col1 + Select Operator [SEL_292] (rows=28798881 width=8) + Output:["_col0","_col1"] + TableScan [TS_55] (rows=28798881 width=8) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0, _col2 + Merge Join Operator [MERGEJOIN_250] (rows=216421239 width=300) + Conds:RS_57._col1=RS_291._col0(Left Outer),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] + PartitionCols:_col0 + Select Operator [SEL_290] (rows=2300 width=4) + Output:["_col0"] + TableScan [TS_53] (rows=2300 width=4) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Select Operator [SEL_52] (rows=216421239 width=299) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_51] (rows=216421239 width=315) + predicate:(_col12 > _col8) + Merge Join Operator [MERGEJOIN_249] (rows=649263719 width=315) + Conds:RS_48._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col12"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=73049 width=12) + Output:["_col0","_col1"] + TableScan [TS_43] (rows=73049 width=98) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_248] (rows=649263719 width=311) + Conds:RS_45._col4, _col7=RS_287._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] + PartitionCols:_col0, _col1 + Select Operator [SEL_286] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_285] (rows=73049 width=8) + predicate:d_week_seq is not null + TableScan [TS_40] (rows=73049 width=8) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col4, _col7 + Select Operator [SEL_39] (rows=580674579 width=315) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_247] (rows=580674579 width=315) + Conds:RS_36._col2=RS_274._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_274] + PartitionCols:_col0 + Select Operator [SEL_273] (rows=1440 width=4) + Output:["_col0"] + Filter Operator [FIL_272] (rows=1440 width=96) + predicate:(hd_buy_potential = '1001-5000') + TableScan [TS_30] (rows=7200 width=96) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_246] (rows=2903372850 width=319) + Conds:RS_33._col0=RS_266._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_266] + PartitionCols:_col0 + Select Operator [SEL_265] (rows=652 width=16) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_264] (rows=652 width=106) + predicate:((d_year = 2001) and d_week_seq is not null) + TableScan [TS_27] (rows=73049 width=106) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=8189226191 width=311) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_245] (rows=8189226191 width=311) + Conds:RS_23._col4=RS_284._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + PartitionCols:_col0 + Select Operator [SEL_283] (rows=462000 width=188) + Output:["_col0","_col1"] + TableScan [TS_18] (rows=462000 width=188) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_244] (rows=8189226191 width=127) + Conds:RS_20._col2=RS_258._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_258] + PartitionCols:_col0 + Select Operator [SEL_257] (rows=265971 width=4) + Output:["_col0"] + Filter Operator [FIL_256] (rows=265971 width=89) + predicate:(cd_marital_status = 'M') + TableScan [TS_15] (rows=1861800 width=89) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col2 + Select Operator [SEL_14] (rows=56525338557 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_13] (rows=56525338557 width=139) + predicate:(_col3 < _col13) + Merge Join Operator [MERGEJOIN_243] (rows=169576015673 width=139) + Conds:RS_10._col1=RS_282._col4(Inner),Output:["_col0","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] + PartitionCols:_col4 + Select Operator [SEL_281] (rows=282274763 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_280] (rows=282274763 width=31) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_21_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_21_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_21_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_37_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_37_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_37_household_demographics_hd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_34_d1_d_date_sk_min) AND DynamicValue(RS_34_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_4] (rows=287989836 width=31) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_263] + Group By Operator [GBY_262] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_259] (rows=265971 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_257] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_271] + Group By Operator [GBY_270] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_269] + Group By Operator [GBY_268] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_267] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_265] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_279] + Group By Operator [GBY_278] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_277] + Group By Operator [GBY_276] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_275] (rows=1440 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_273] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_242] (rows=37584000 width=111) + Conds:RS_253._col2=RS_255._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] + PartitionCols:_col2 + Select Operator [SEL_252] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] + PartitionCols:_col0 + Select Operator [SEL_254] (rows=27 width=104) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out new file mode 100644 index 0000000000..62df36ba68 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query73.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: explain +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_135] + Select Operator [SEL_134] (rows=59862 width=364) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_35] + Select Operator [SEL_34] (rows=59862 width=364) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_100] (rows=59862 width=364) + Conds:RS_102._col0=RS_133._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=80000000 width=356) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_0] (rows=80000000 width=356) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_133] + PartitionCols:_col1 + Filter Operator [FIL_132] (rows=59862 width=12) + predicate:_col2 BETWEEN 1 AND 5 + Select Operator [SEL_131] (rows=1197233 width=12) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_130] (rows=1197233 width=12) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0, _col1 + Group By Operator [GBY_25] (rows=1197233 width=12) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_99] (rows=1197233 width=4) + Conds:RS_21._col1=RS_121._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + PartitionCols:_col0 + Select Operator [SEL_120] (rows=480 width=4) + Output:["_col0"] + Filter Operator [FIL_119] (rows=480 width=104) + predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END) + TableScan [TS_18] (rows=7200 width=104) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=17958489 width=4) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_98] (rows=17958489 width=4) + Conds:RS_14._col3=RS_113._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + PartitionCols:_col0 + Select Operator [SEL_112] (rows=85 width=4) + Output:["_col0"] + Filter Operator [FIL_111] (rows=85 width=102) + predicate:(s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') + TableScan [TS_8] (rows=1704 width=102) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_97] (rows=44605486 width=10) + Conds:RS_129._col0=RS_105._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_104] (rows=170 width=4) + Output:["_col0"] + Filter Operator [FIL_103] (rows=170 width=12) + predicate:((d_year) IN (2000, 2001, 2002) and d_dom BETWEEN 1 AND 2) + TableScan [TS_5] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=479121995 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_127] (rows=479121995 width=19) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_22_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_22_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_22_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_106] (rows=170 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_104] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_114] (rows=85 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_112] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_122] (rows=480 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_120] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out new file mode 100644 index 0000000000..5f41966a79 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out @@ -0,0 +1,399 @@ +PREHOOK: query: explain +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 24 <- Reducer 18 (BROADCAST_EDGE) +Map 25 <- Reducer 22 (BROADCAST_EDGE) +Map 26 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 23 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (ONE_TO_ONE_EDGE) +Reducer 13 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 24 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 15 <- Map 23 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 18 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 25 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 26 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_347] + Limit [LIM_346] (rows=100 width=280) + Number of rows:100 + Select Operator [SEL_345] (rows=12248093 width=280) + Output:["_col0","_col1","_col2"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_90] + Select Operator [SEL_89] (rows=12248093 width=280) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_88] (rows=12248093 width=628) + predicate:CASE WHEN (_col7) THEN (CASE WHEN (_col8) THEN (((_col1 / _col6) > _col9)) ELSE (_col10) END) ELSE (CASE WHEN (_col8) THEN (((_col1 / _col6) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_282] (rows=24496187 width=628) + Conds:RS_317._col0=RS_86._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 12 [ONE_TO_ONE_EDGE] + FORWARD [RS_86] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=20485012 width=616) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_281] (rows=20485012 width=724) + Conds:RS_81._col0=RS_324._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] + PartitionCols:_col0 + Group By Operator [GBY_323] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_74] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_279] (rows=187573258 width=377) + Conds:RS_70._col1=RS_312._col0(Inner),Output:["_col2","_col5","_col6","_col7"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=80000000 width=284) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_6] (rows=80000000 width=284) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_278] (rows=187573258 width=101) + Conds:RS_322._col0=RS_291._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] + PartitionCols:_col0 + Select Operator [SEL_286] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_283] (rows=652 width=8) + predicate:((d_year = 2002) and (d_year) IN (2001, 2002)) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Select Operator [SEL_321] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_320] (rows=525327388 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_59] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_319] + Group By Operator [GBY_318] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] + Group By Operator [GBY_298] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_292] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_286] + <-Reducer 17 [ONE_TO_ONE_EDGE] + FORWARD [RS_81] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_280] (rows=17130654 width=332) + Conds:RS_334._col0=RS_344._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0 + Select Operator [SEL_333] (rows=26666666 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_332] (rows=26666666 width=212) + predicate:(_col3 > 0) + Select Operator [SEL_331] (rows=80000000 width=212) + Output:["_col0","_col3"] + Group By Operator [GBY_330] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_34] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_275] (rows=187573258 width=377) + Conds:RS_30._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_313] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_274] (rows=187573258 width=101) + Conds:RS_329._col0=RS_293._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_284] (rows=652 width=8) + predicate:((d_year = 2001) and (d_year) IN (2001, 2002)) + Please refer to the previous TableScan [TS_3] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] + PartitionCols:_col0 + Select Operator [SEL_328] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_327] (rows=525327388 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_19] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_326] + Group By Operator [GBY_325] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_303] + Group By Operator [GBY_299] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_294] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_287] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_344] + PartitionCols:_col0 + Select Operator [SEL_343] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_342] (rows=17130654 width=212) + predicate:(_col3 > 0) + Select Operator [SEL_341] (rows=51391963 width=212) + Output:["_col0","_col3"] + Group By Operator [GBY_340] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_54] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=391) + Conds:RS_50._col1=RS_314._col0(Inner),Output:["_col2","_col5","_col6","_col7"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_276] (rows=51391963 width=115) + Conds:RS_339._col0=RS_295._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_285] (rows=652 width=8) + predicate:((d_year = 2001) and (d_year) IN (2001, 2002)) + Please refer to the previous TableScan [TS_3] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_339] + PartitionCols:_col0 + Select Operator [SEL_338] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_337] (rows=143930993 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_39] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_336] + Group By Operator [GBY_335] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] + Group By Operator [GBY_300] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_296] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_288] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_317] + PartitionCols:_col0 + Select Operator [SEL_316] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_315] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_15] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_273] (rows=51391963 width=391) + Conds:RS_11._col1=RS_311._col0(Inner),Output:["_col2","_col5","_col6","_col7"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_272] (rows=51391963 width=115) + Conds:RS_309._col0=RS_289._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_286] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col0 + Select Operator [SEL_308] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_307] (rows=143930993 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] + Group By Operator [GBY_297] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_290] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_286] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out new file mode 100644 index 0000000000..716ede9c40 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out @@ -0,0 +1,674 @@ +PREHOOK: query: explain +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 14 <- Reducer 18 (BROADCAST_EDGE), Reducer 39 (BROADCAST_EDGE) +Map 46 <- Reducer 22 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE) +Map 48 <- Reducer 26 (BROADCAST_EDGE), Reducer 41 (BROADCAST_EDGE) +Map 49 <- Reducer 29 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE) +Map 50 <- Reducer 33 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE) +Map 51 <- Reducer 37 (BROADCAST_EDGE), Reducer 44 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Map 38 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 46 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 20 <- Map 38 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 45 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 22 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 17 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) +Reducer 24 <- Map 38 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 47 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 26 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 17 (SIMPLE_EDGE), Map 49 (SIMPLE_EDGE) +Reducer 28 <- Map 38 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 17 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) +Reducer 31 <- Map 38 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 45 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 33 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 17 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 35 <- Map 38 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 36 <- Map 47 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 37 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 38 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Map 38 (CUSTOM_SIMPLE_EDGE) +Reducer 41 <- Map 38 (CUSTOM_SIMPLE_EDGE) +Reducer 42 <- Map 38 (CUSTOM_SIMPLE_EDGE) +Reducer 43 <- Map 38 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 38 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE), Union 10 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_651] + Select Operator [SEL_650] (rows=100 width=160) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Limit [LIM_649] (rows=100 width=152) + Number of rows:100 + Select Operator [SEL_648] (rows=3422897230256 width=151) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_175] + Select Operator [SEL_174] (rows=3422897230256 width=151) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_173] (rows=3422897230256 width=479) + predicate:((_col13 / _col6) < 0.9) + Merge Join Operator [MERGEJOIN_530] (rows=10268691690770 width=479) + Conds:RS_643._col0, _col1, _col2, _col3=RS_647._col0, _col1, _col2, _col3(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col11","_col12","_col13"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_647] + PartitionCols:_col0, _col1, _col2, _col3 + Select Operator [SEL_646] (rows=84235776 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_645] (rows=84235776 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 + Group By Operator [GBY_644] (rows=736356923 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Union 12 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_661] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_660] (rows=736356923 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_659] (rows=621178955 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Union 10 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] + Reduce Output Operator [RS_563] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_562] (rows=621178955 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_560] (rows=450703984 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_559] (rows=450703984 width=204) + Conds:RS_671._col0, _col1=RS_126._col0, _col1(Right Outer),Output:["_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 45 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_671] + PartitionCols:_col0, _col1 + Select Operator [SEL_669] (rows=57591150 width=119) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_22] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_126] + PartitionCols:_col0, _col1 + Select Operator [SEL_124] (rows=187186493 width=124) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_525] (rows=187186493 width=124) + Conds:RS_121._col1=RS_616._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_616] + PartitionCols:_col0 + Select Operator [SEL_607] (rows=45745 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_606] (rows=45745 width=109) + predicate:((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_manufact_id is not null) + TableScan [TS_8] (rows=462000 width=109) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id","i_category","i_manufact_id"] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_121] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_524] (rows=196410188 width=109) + Conds:RS_688._col0=RS_588._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_588] + PartitionCols:_col0 + Select Operator [SEL_578] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_574] (rows=652 width=8) + predicate:(d_year = 2002) + TableScan [TS_5] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_688] + PartitionCols:_col0 + Select Operator [SEL_687] (rows=550076554 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_686] (rows=550076554 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_122_item_i_item_sk_min) AND DynamicValue(RS_122_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_122_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_119_date_dim_d_date_sk_min) AND DynamicValue(RS_119_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_119_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_109] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_683] + Group By Operator [GBY_682] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_602] + Group By Operator [GBY_596] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_589] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_578] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_685] + Group By Operator [GBY_684] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_630] + Group By Operator [GBY_624] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_617] (rows=45745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 9 [CONTAINS] + Reduce Output Operator [RS_544] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_543] (rows=621178955 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_541] (rows=170474971 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_540] (rows=170474971 width=234) + Conds:RS_571._col0, _col1=RS_104._col0, _col1(Right Outer),Output:["_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_571] + PartitionCols:_col0, _col1 + Select Operator [SEL_569] (rows=28798881 width=121) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col0, _col1 + Select Operator [SEL_102] (rows=96821196 width=138) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_522] (rows=96821196 width=138) + Conds:RS_99._col1=RS_614._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_614] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_521] (rows=101592102 width=122) + Conds:RS_658._col0=RS_586._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_586] + PartitionCols:_col0 + Select Operator [SEL_577] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_573] (rows=652 width=8) + predicate:(d_year = 2002) + Please refer to the previous TableScan [TS_5] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_658] + PartitionCols:_col0 + Select Operator [SEL_657] (rows=286549727 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_656] (rows=286549727 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_100_item_i_item_sk_min) AND DynamicValue(RS_100_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_100_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_87] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_653] + Group By Operator [GBY_652] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_601] + Group By Operator [GBY_595] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_587] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_577] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_655] + Group By Operator [GBY_654] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_629] + Group By Operator [GBY_623] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_615] (rows=45745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 36 [CONTAINS] + Reduce Output Operator [RS_568] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_567] (rows=736356923 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_565] (rows=115177968 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_564] (rows=115177968 width=220) + Conds:RS_681._col0, _col1=RS_155._col0, _col1(Right Outer),Output:["_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_681] + PartitionCols:_col0, _col1 + Select Operator [SEL_679] (rows=14398467 width=118) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_51] (rows=14398467 width=118) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_155] + PartitionCols:_col0, _col1 + Select Operator [SEL_153] (rows=48990732 width=139) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_528] (rows=48990732 width=139) + Conds:RS_150._col1=RS_618._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_618] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_150] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_527] (rows=51404771 width=123) + Conds:RS_695._col0=RS_590._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_590] + PartitionCols:_col0 + Select Operator [SEL_579] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_575] (rows=652 width=8) + predicate:(d_year = 2002) + Please refer to the previous TableScan [TS_5] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_695] + PartitionCols:_col0 + Select Operator [SEL_694] (rows=143966864 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_693] (rows=143966864 width=127) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_151_item_i_item_sk_min) AND DynamicValue(RS_151_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_151_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_148_date_dim_d_date_sk_min) AND DynamicValue(RS_148_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_148_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_138] (rows=144002668 width=127) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_690] + Group By Operator [GBY_689] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_603] + Group By Operator [GBY_597] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_591] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_579] + <-Reducer 44 [BROADCAST_EDGE] vectorized + BROADCAST [RS_692] + Group By Operator [GBY_691] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_631] + Group By Operator [GBY_625] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_619] (rows=45745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_643] + PartitionCols:_col0, _col1, _col2, _col3 + Select Operator [SEL_642] (rows=84235776 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_641] (rows=84235776 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 + Group By Operator [GBY_640] (rows=736356923 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 25 [CONTAINS] + Reduce Output Operator [RS_558] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_557] (rows=736356923 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_555] (rows=115177968 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_554] (rows=115177968 width=220) + Conds:RS_680._col0, _col1=RS_70._col0, _col1(Right Outer),Output:["_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_680] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_679] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0, _col1 + Select Operator [SEL_68] (rows=48990732 width=139) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_519] (rows=48990732 width=139) + Conds:RS_65._col1=RS_612._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_612] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_518] (rows=51404771 width=123) + Conds:RS_678._col0=RS_584._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_584] + PartitionCols:_col0 + Select Operator [SEL_576] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_572] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_5] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_678] + PartitionCols:_col0 + Select Operator [SEL_677] (rows=143966864 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_676] (rows=143966864 width=127) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_66_item_i_item_sk_min) AND DynamicValue(RS_66_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_66_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_63_date_dim_d_date_sk_min) AND DynamicValue(RS_63_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_63_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_53] (rows=144002668 width=127) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_673] + Group By Operator [GBY_672] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_600] + Group By Operator [GBY_594] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_585] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_576] + <-Reducer 41 [BROADCAST_EDGE] vectorized + BROADCAST [RS_675] + Group By Operator [GBY_674] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_628] + Group By Operator [GBY_622] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_613] (rows=45745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_639] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_638] (rows=736356923 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_637] (rows=621178955 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Union 3 [SIMPLE_EDGE] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_535] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_534] (rows=621178955 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_532] (rows=170474971 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_531] (rows=170474971 width=234) + Conds:RS_570._col0, _col1=RS_19._col0, _col1(Right Outer),Output:["_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_570] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_569] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0, _col1 + Select Operator [SEL_17] (rows=96821196 width=138) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_513] (rows=96821196 width=138) + Conds:RS_14._col1=RS_608._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_608] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_512] (rows=101592102 width=122) + Conds:RS_636._col0=RS_580._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_580] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_576] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_636] + PartitionCols:_col0 + Select Operator [SEL_635] (rows=286549727 width=127) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_634] (rows=286549727 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_15_item_i_item_sk_min) AND DynamicValue(RS_15_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_15_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_2] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_605] + Group By Operator [GBY_604] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_598] + Group By Operator [GBY_592] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_581] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_576] + <-Reducer 39 [BROADCAST_EDGE] vectorized + BROADCAST [RS_633] + Group By Operator [GBY_632] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_626] + Group By Operator [GBY_620] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_609] (rows=45745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 21 [CONTAINS] + Reduce Output Operator [RS_553] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_552] (rows=621178955 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 + Select Operator [SEL_550] (rows=450703984 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_549] (rows=450703984 width=204) + Conds:RS_670._col0, _col1=RS_41._col0, _col1(Right Outer),Output:["_col2","_col3","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 45 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_670] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_669] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0, _col1 + Select Operator [SEL_39] (rows=187186493 width=124) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_516] (rows=187186493 width=124) + Conds:RS_36._col1=RS_610._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_610] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_515] (rows=196410188 width=109) + Conds:RS_668._col0=RS_582._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_582] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_576] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_668] + PartitionCols:_col0 + Select Operator [SEL_667] (rows=550076554 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_666] (rows=550076554 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_37_item_i_item_sk_min) AND DynamicValue(RS_37_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_37_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_24] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_663] + Group By Operator [GBY_662] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_599] + Group By Operator [GBY_593] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_583] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_576] + <-Reducer 40 [BROADCAST_EDGE] vectorized + BROADCAST [RS_665] + Group By Operator [GBY_664] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_627] + Group By Operator [GBY_621] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_611] (rows=45745 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out new file mode 100644 index 0000000000..56d4500fa1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out @@ -0,0 +1,197 @@ +PREHOOK: query: explain +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Map 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_175] + Limit [LIM_174] (rows=100 width=408) + Number of rows:100 + Select Operator [SEL_173] (rows=5600 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=5600 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Union 4 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] + Reduce Output Operator [RS_161] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_160] (rows=224000 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 + Top N Key Operator [TNK_159] (rows=26219002 width=388) + keys:_col0, _col1, _col2, _col3, _col4,sort order:+++++,top n:100 + Select Operator [SEL_157] (rows=1433911 width=399) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_156] (rows=1433911 width=209) + Conds:RS_39._col0=RS_185._col0(Inner),Output:["_col2","_col4","_col6","_col7"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_185] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=73049 width=12) + Output:["_col0","_col1","_col2"] + TableScan [TS_34] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_142] (rows=1433911 width=205) + Conds:RS_183._col1=RS_165._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_165] + PartitionCols:_col0 + Select Operator [SEL_162] (rows=462000 width=94) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=462000 width=94) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col1 + Select Operator [SEL_182] (rows=1433911 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_181] (rows=1433911 width=123) + predicate:(cs_sold_date_sk is not null and cs_warehouse_sk is null) + TableScan [TS_29] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 3 [CONTAINS] + Reduce Output Operator [RS_149] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_148] (rows=224000 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 + Top N Key Operator [TNK_147] (rows=26219002 width=388) + keys:_col0, _col1, _col2, _col3, _col4,sort order:+++++,top n:100 + Select Operator [SEL_145] (rows=24749363 width=387) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_144] (rows=24749363 width=204) + Conds:RS_10._col2=RS_170._col0(Inner),Output:["_col1","_col4","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + PartitionCols:_col0 + Select Operator [SEL_169] (rows=73049 width=12) + Output:["_col0","_col1","_col2"] + TableScan [TS_5] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_138] (rows=24749363 width=200) + Conds:RS_163._col0=RS_168._col1(Inner),Output:["_col1","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_162] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_168] + PartitionCols:_col1 + Select Operator [SEL_167] (rows=24749363 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_166] (rows=24749363 width=118) + predicate:(ss_addr_sk is null and ss_sold_date_sk is not null) + TableScan [TS_2] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 8 [CONTAINS] + Reduce Output Operator [RS_155] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_154] (rows=224000 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 + Top N Key Operator [TNK_153] (rows=26219002 width=388) + keys:_col0, _col1, _col2, _col3, _col4,sort order:+++++,top n:100 + Select Operator [SEL_151] (rows=35728 width=394) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_150] (rows=35728 width=209) + Conds:RS_24._col0=RS_180._col0(Inner),Output:["_col2","_col4","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_180] + PartitionCols:_col0 + Select Operator [SEL_179] (rows=73049 width=12) + Output:["_col0","_col1","_col2"] + TableScan [TS_19] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_140] (rows=35728 width=205) + Conds:RS_178._col1=RS_164._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_162] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_178] + PartitionCols:_col1 + Select Operator [SEL_177] (rows=35728 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_176] (rows=35728 width=123) + predicate:(ws_sold_date_sk is not null and ws_web_page_sk is null) + TableScan [TS_14] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_ext_sales_price"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out new file mode 100644 index 0000000000..78f7c28b7a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out @@ -0,0 +1,504 @@ +Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +PREHOOK: query: explain +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 25 <- Reducer 15 (BROADCAST_EDGE) +Map 27 <- Reducer 21 (BROADCAST_EDGE) +Reducer 10 <- Map 24 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 25 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 15 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 26 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 27 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 23 (ONE_TO_ONE_EDGE), Union 5 (CONTAINS) +Reducer 21 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 28 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_271] + Limit [LIM_270] (rows=24 width=437) + Number of rows:100 + Select Operator [SEL_269] (rows=24 width=437) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_268] + Select Operator [SEL_267] (rows=24 width=437) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_266] (rows=24 width=445) + Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] + Reduce Output Operator [RS_230] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_229] (rows=58 width=445) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_228] (rows=39 width=435) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_226] (rows=2 width=439) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_225] (rows=2 width=452) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_278] + Group By Operator [GBY_277] (rows=2 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Group By Operator [GBY_44] (rows=56 width=227) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col1 + Merge Join Operator [MERGEJOIN_212] (rows=31836679 width=222) + Conds:RS_276._col0=RS_242._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_242] + PartitionCols:_col0 + Select Operator [SEL_238] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_237] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_276] + PartitionCols:_col0 + Select Operator [SEL_275] (rows=286549727 width=231) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_274] (rows=286549727 width=231) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_41_date_dim_d_date_sk_min) AND DynamicValue(RS_41_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_41_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_34] (rows=287989836 width=231) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_273] + Group By Operator [GBY_272] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_252] + Group By Operator [GBY_249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_243] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_238] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_283] + Group By Operator [GBY_282] (rows=1 width=224) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_59] + Group By Operator [GBY_58] (rows=1 width=224) + Output:["_col0","_col1"],aggregations:["sum(_col1)","sum(_col2)"] + Merge Join Operator [MERGEJOIN_213] (rows=3199657 width=183) + Conds:RS_281._col0=RS_244._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_244] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_238] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_281] + PartitionCols:_col0 + Select Operator [SEL_280] (rows=28798881 width=223) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_279] (rows=28798881 width=223) + predicate:cr_returned_date_sk is not null + TableScan [TS_48] (rows=28798881 width=223) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] + <-Reducer 20 [CONTAINS] + Reduce Output Operator [RS_236] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_235] (rows=58 width=445) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_234] (rows=39 width=435) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_232] (rows=25 width=435) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_231] (rows=25 width=452) + Conds:RS_290._col0=RS_295._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_290] + PartitionCols:_col0 + Group By Operator [GBY_289] (rows=24 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col0 + Group By Operator [GBY_78] (rows=360 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_76] (rows=15991254 width=227) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_214] (rows=15991254 width=227) + Conds:RS_288._col0=RS_245._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_245] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_238] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_288] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=143931136 width=231) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_286] (rows=143931136 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_67] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_285] + Group By Operator [GBY_284] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_253] + Group By Operator [GBY_250] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_246] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_238] + <-Reducer 23 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_295] + PartitionCols:_col0 + Group By Operator [GBY_294] (rows=24 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Group By Operator [GBY_93] (rows=24 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_91] (rows=1458758 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_215] (rows=1458758 width=135) + Conds:RS_293._col0=RS_247._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_247] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_238] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0 + Select Operator [SEL_292] (rows=13129719 width=221) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_291] (rows=13129719 width=221) + predicate:(wr_returned_date_sk is not null and wr_web_page_sk is not null) + TableScan [TS_82] (rows=14398467 width=221) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] + <-Reducer 4 [CONTAINS] + Reduce Output Operator [RS_224] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_223] (rows=58 width=445) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_222] (rows=39 width=435) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_220] (rows=12 width=436) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_219] (rows=12 width=451) + Conds:RS_260._col0=RS_265._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_265] + PartitionCols:_col0 + Group By Operator [GBY_264] (rows=11 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Group By Operator [GBY_26] (rows=44 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_24] (rows=5959021 width=156) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_211] (rows=5959021 width=156) + Conds:RS_263._col0=RS_241._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_241] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_238] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_263] + PartitionCols:_col0 + Select Operator [SEL_262] (rows=53634860 width=223) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_261] (rows=53634860 width=223) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) + TableScan [TS_15] (rows=57591150 width=223) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] + <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_260] + PartitionCols:_col0 + Group By Operator [GBY_259] (rows=11 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=341 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_9] (rows=58365993 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_210] (rows=58365993 width=135) + Conds:RS_258._col0=RS_239._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_239] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_238] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_258] + PartitionCols:_col0 + Select Operator [SEL_257] (rows=525329897 width=221) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_256] (rows=525329897 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_255] + Group By Operator [GBY_254] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_251] + Group By Operator [GBY_248] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_240] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_238] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out new file mode 100644 index 0000000000..ef4d6dbc21 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out @@ -0,0 +1,345 @@ +PREHOOK: query: explain +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=cs_item_sk and cs_customer_sk=ss_customer_sk) +where coalesce(ws_qty,0)>0 and coalesce(cs_qty, 0)>0 and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 14 <- Reducer 7 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Map 20 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_238] + Limit [LIM_237] (rows=100 width=484) + Number of rows:100 + Select Operator [SEL_236] (rows=462576000046 width=483) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_73] + Select Operator [SEL_72] (rows=462576000046 width=719) + Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Filter Operator [FIL_71] (rows=462576000046 width=702) + predicate:CASE WHEN (_col11 is not null) THEN ((_col11 > 0L)) ELSE (false) END + Merge Join Operator [MERGEJOIN_191] (rows=925152000093 width=702) + Conds:RS_68._col1=RS_235._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + PartitionCols:_col0 + Select Operator [SEL_234] (rows=101592102 width=235) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_233] (rows=101592102 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0, _col1 + Group By Operator [GBY_64] (rows=101592102 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)"],keys:_col2, _col3 + Merge Join Operator [MERGEJOIN_189] (rows=101592102 width=233) + Conds:RS_198._col0=RS_61._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_192] (rows=652 width=8) + predicate:(d_year = 2000) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0 + Select Operator [SEL_59] (rows=286549727 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_58] (rows=286549727 width=240) + predicate:_col8 is null + Merge Join Operator [MERGEJOIN_188] (rows=468719906 width=240) + Conds:RS_230._col2, _col3=RS_232._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + PartitionCols:_col2, _col3 + Select Operator [SEL_229] (rows=286549727 width=242) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_228] (rows=286549727 width=242) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_60_date_dim_d_date_sk_min) AND DynamicValue(RS_60_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_60_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_50] (rows=287989836 width=242) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_order_number","cs_quantity","cs_wholesale_cost","cs_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_199] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + PartitionCols:_col0, _col1 + Select Operator [SEL_231] (rows=28798881 width=8) + Output:["_col0","_col1"] + TableScan [TS_53] (rows=28798881 width=8) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col1 + Filter Operator [FIL_45] (rows=4580227799 width=471) + predicate:CASE WHEN (_col7 is not null) THEN ((_col7 > 0L)) ELSE (false) END + Merge Join Operator [MERGEJOIN_190] (rows=9160455599 width=471) + Conds:RS_215._col1, _col0=RS_225._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] + <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_215] + PartitionCols:_col1, _col0 + Select Operator [SEL_214] (rows=114214965 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_213] (rows=114214965 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1 + Group By Operator [GBY_17] (rows=114214965 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)"],keys:_col3, _col2 + Merge Join Operator [MERGEJOIN_185] (rows=114214965 width=217) + Conds:RS_194._col0=RS_14._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_193] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0 + Select Operator [SEL_12] (rows=319876350 width=233) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_11] (rows=319876350 width=235) + predicate:_col8 is null + Merge Join Operator [MERGEJOIN_184] (rows=883006376 width=235) + Conds:RS_210._col1, _col3=RS_212._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_210] + PartitionCols:_col1, _col3 + Select Operator [SEL_209] (rows=550076554 width=233) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_208] (rows=550076554 width=233) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_3] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_207] + Group By Operator [GBY_206] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_195] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + PartitionCols:_col0, _col1 + Select Operator [SEL_211] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_225] + PartitionCols:_col1, _col0 + Select Operator [SEL_224] (rows=40539971 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_223] (rows=40539971 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1 + Group By Operator [GBY_38] (rows=40539971 width=239) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)"],keys:_col3, _col2 + Merge Join Operator [MERGEJOIN_187] (rows=40539971 width=235) + Conds:RS_196._col0=RS_35._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_196] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_193] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_33] (rows=113538342 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_32] (rows=113538342 width=242) + predicate:_col8 is null + Merge Join Operator [MERGEJOIN_186] (rows=254679677 width=242) + Conds:RS_220._col1, _col3=RS_222._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + PartitionCols:_col1, _col3 + Select Operator [SEL_219] (rows=143966864 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_218] (rows=143966864 width=243) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_24] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_order_number","ws_quantity","ws_wholesale_cost","ws_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_217] + Group By Operator [GBY_216] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] + Group By Operator [GBY_201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_197] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] + PartitionCols:_col0, _col1 + Select Operator [SEL_221] (rows=14398467 width=8) + Output:["_col0","_col1"] + TableScan [TS_27] (rows=14398467 width=8) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out new file mode 100644 index 0000000000..129270b3c9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) + and date_dim.d_dow = 1 + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=776) + Number of rows:100 + Select Operator [SEL_128] (rows=43530621 width=776) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=43530621 width=776) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_100] (rows=43530621 width=685) + Conds:RS_102._col0=RS_127._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=80000000 width=184) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col1 + Select Operator [SEL_126] (rows=43530621 width=507) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Group By Operator [GBY_125] (rows=43530621 width=325) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_25] (rows=43530621 width=325) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col2, _col3, _col6 + Merge Join Operator [MERGEJOIN_99] (rows=43530621 width=214) + Conds:RS_21._col1=RS_113._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + PartitionCols:_col0 + Select Operator [SEL_112] (rows=3055 width=4) + Output:["_col0"] + Filter Operator [FIL_111] (rows=3055 width=12) + predicate:((hd_dep_count = 8) or (hd_vehicle_count > 0)) + TableScan [TS_18] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=102592623 width=283) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_98] (rows=102592623 width=283) + Conds:RS_14._col4=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col10"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=1704 width=97) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=1704 width=100) + predicate:s_number_employees BETWEEN 200 AND 295 + TableScan [TS_8] (rows=1704 width=100) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_number_employees","s_city"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_97] (rows=102592623 width=193) + Conds:RS_121._col0=RS_105._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_104] (rows=391 width=4) + Output:["_col0"] + Filter Operator [FIL_103] (rows=391 width=12) + predicate:((d_dow = 1) and (d_year) IN (1998, 1999, 2000)) + TableScan [TS_5] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dow"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + PartitionCols:_col0 + Select Operator [SEL_120] (rows=479121995 width=237) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_119] (rows=479121995 width=237) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_22_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_22_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_22_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=237) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_106] (rows=391 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_104] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_114] (rows=3055 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_112] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out new file mode 100644 index 0000000000..f8ce436c89 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -0,0 +1,387 @@ +PREHOOK: query: explain +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '11110','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', + '88500','67298','13261','14172','81410', + '93578','83583','46047','94167','82564', + '21156','15799','86709','37931','74703', + '83103','23054','70470','72008','49247', + '91911','69998','20961','70070','63197', + '54853','88191','91830','49521','19454', + '81450','89091','62378','25683','61869', + '51744','36580','85778','36871','48121', + '28810','83712','45486','67393','26935', + '42393','20132','55349','86057','21309', + '80218','10094','11357','48819','39734', + '40758','30432','21204','29467','30214', + '61024','55307','74621','11622','68908', + '33032','52868','99194','99900','84936', + '69036','99149','45013','32895','59004', + '32322','14933','32936','33562','72550', + '27385','58049','58200','16808','21360', + '32961','18586','79307','15492')) + intersect + (select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1))A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2002 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_154] + Limit [LIM_153] (rows=1 width=200) + Number of rows:100 + Select Operator [SEL_152] (rows=1 width=200) + Output:["_col0","_col1"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=200) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0 + Group By Operator [GBY_56] (rows=1 width=200) + Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1 + Top N Key Operator [TNK_83] (rows=39957 width=88) + keys:_col1,sort order:+,top n:100 + Merge Join Operator [MERGEJOIN_123] (rows=39957 width=88) + Conds:RS_52._col2=RS_149._col0(Inner),Output:["_col0","_col1"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=1 width=184) + Output:["_col0"] + Filter Operator [FIL_147] (rows=1 width=192) + predicate:(_col1 = 2L) + Group By Operator [GBY_146] (rows=5633 width=192) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Union 12 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_161] + PartitionCols:_col0 + Group By Operator [GBY_160] (rows=5633 width=192) + Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 + Group By Operator [GBY_159] (rows=10141 width=192) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + PartitionCols:_col0 + Group By Operator [GBY_157] (rows=141974 width=192) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Select Operator [SEL_156] (rows=20000000 width=89) + Output:["_col0"] + Filter Operator [FIL_155] (rows=20000000 width=89) + predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) + TableScan [TS_16] (rows=40000000 width=89) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] + <-Reducer 17 [CONTAINS] vectorized + Reduce Output Operator [RS_175] + PartitionCols:_col0 + Group By Operator [GBY_174] (rows=5633 width=192) + Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 + Group By Operator [GBY_173] (rows=1126 width=192) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_172] + PartitionCols:_col0 + Group By Operator [GBY_171] (rows=1126 width=192) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Select Operator [SEL_170] (rows=2253 width=97) + Output:["_col0"] + Filter Operator [FIL_169] (rows=2253 width=97) + predicate:(_col1 > 10L) + Group By Operator [GBY_168] (rows=6761 width=97) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=67610 width=97) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 + Merge Join Operator [MERGEJOIN_122] (rows=26666667 width=89) + Conds:RS_164._col0=RS_167._col0(Inner),Output:["_col1"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + PartitionCols:_col0 + Select Operator [SEL_163] (rows=40000000 width=93) + Output:["_col0","_col1"] + Filter Operator [FIL_162] (rows=40000000 width=93) + predicate:substr(substr(ca_zip, 1, 5), 1, 2) is not null + TableScan [TS_24] (rows=40000000 width=93) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_167] + PartitionCols:_col0 + Select Operator [SEL_166] (rows=26666667 width=4) + Output:["_col0"] + Filter Operator [FIL_165] (rows=26666667 width=89) + predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) + TableScan [TS_27] (rows=80000000 width=89) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col2 + Select Operator [SEL_15] (rows=37399754 width=313) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_121] (rows=37399754 width=313) + Conds:RS_12._col1=RS_145._col0(Inner),Output:["_col2","_col5","_col6"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + PartitionCols:_col0 + Select Operator [SEL_144] (rows=1704 width=276) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_143] (rows=1704 width=181) + predicate:substr(s_zip, 1, 2) is not null + TableScan [TS_6] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_120] (rows=37399754 width=42) + Conds:RS_142._col0=RS_134._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_134] + PartitionCols:_col0 + Select Operator [SEL_133] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_132] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] + PartitionCols:_col0 + Select Operator [SEL_141] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_140] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_135] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_133] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out new file mode 100644 index 0000000000..d77567025e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out @@ -0,0 +1,615 @@ +PREHOOK: query: explain +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_page +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_page +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 20 <- Reducer 25 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 35 <- Reducer 14 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE) +Map 39 <- Reducer 19 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 11 <- Map 38 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 31 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 14 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 16 <- Map 42 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 31 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 19 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 22 <- Map 24 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 24 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 27 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 24 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) +Reducer 29 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 30 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 4 <- Map 31 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_456] + Limit [LIM_455] (rows=100 width=619) + Number of rows:100 + Select Operator [SEL_454] (rows=19423 width=619) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_453] + Select Operator [SEL_452] (rows=19423 width=619) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_451] (rows=19423 width=627) + Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 13 [CONTAINS] vectorized + Reduce Output Operator [RS_474] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_473] (rows=29791 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_472] (rows=19861 width=618) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_471] (rows=19423 width=619) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_470] (rows=19423 width=436) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col0 + Group By Operator [GBY_72] (rows=46000 width=436) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col3)","sum(_col4)"],keys:_col2 + Merge Join Operator [MERGEJOIN_367] (rows=8592843 width=417) + Conds:RS_68._col0=RS_427._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_427] + PartitionCols:_col0 + Select Operator [SEL_424] (rows=1150 width=4) + Output:["_col0"] + Filter Operator [FIL_423] (rows=1150 width=89) + predicate:(p_channel_tv = 'N') + TableScan [TS_27] (rows=2300 width=89) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_tv"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=17185686 width=430) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_366] (rows=17185686 width=430) + Conds:RS_61._col1=RS_469._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col8"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_469] + PartitionCols:_col0 + Select Operator [SEL_468] (rows=46000 width=104) + Output:["_col0","_col1"] + TableScan [TS_56] (rows=46000 width=104) + default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_365] (rows=17185686 width=334) + Conds:RS_395._col0=RS_59._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_395] + PartitionCols:_col0 + Select Operator [SEL_392] (rows=154000 width=4) + Output:["_col0"] + Filter Operator [FIL_391] (rows=154000 width=115) + predicate:(i_current_price > 50) + TableScan [TS_0] (rows=462000 width=115) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price"] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col1 + Select Operator [SEL_55] (rows=51557056 width=344) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_364] (rows=51557056 width=232) + Conds:RS_52._col0=RS_409._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 24 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_409] + PartitionCols:_col0 + Select Operator [SEL_406] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_405] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' + TableScan [TS_8] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_363] (rows=464045263 width=326) + Conds:RS_465._col2, _col4=RS_467._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_465] + PartitionCols:_col2, _col4 + Select Operator [SEL_464] (rows=283691906 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_463] (rows=283691906 width=243) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_58_item_i_item_sk_min) AND DynamicValue(RS_58_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_58_item_i_item_sk_bloom_filter))) and (cs_promo_sk BETWEEN DynamicValue(RS_69_promotion_p_promo_sk_min) AND DynamicValue(RS_69_promotion_p_promo_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_69_promotion_p_promo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_41] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_460] + Group By Operator [GBY_459] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_403] + Group By Operator [GBY_400] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_396] (rows=154000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_392] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_458] + Group By Operator [GBY_457] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_417] + Group By Operator [GBY_414] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_410] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_406] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_462] + Group By Operator [GBY_461] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_435] + Group By Operator [GBY_432] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_428] (rows=1150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_424] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_467] + PartitionCols:_col0, _col1 + Select Operator [SEL_466] (rows=28798881 width=227) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_44] (rows=28798881 width=227) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] + <-Reducer 18 [CONTAINS] vectorized + Reduce Output Operator [RS_492] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_491] (rows=29791 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_490] (rows=19861 width=618) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_489] (rows=27 width=615) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_488] (rows=27 width=436) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_112] + PartitionCols:_col0 + Group By Operator [GBY_111] (rows=243 width=436) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col4)","sum(_col5)"],keys:_col3 + Merge Join Operator [MERGEJOIN_372] (rows=4714659 width=435) + Conds:RS_429._col0=RS_108._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_429] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_424] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_108] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=9429318 width=439) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_371] (rows=9429318 width=439) + Conds:RS_103._col2=RS_487._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col8"] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_487] + PartitionCols:_col0 + Select Operator [SEL_486] (rows=84 width=104) + Output:["_col0","_col1"] + TableScan [TS_98] (rows=84 width=104) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_103] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_370] (rows=9429318 width=343) + Conds:RS_397._col0=RS_101._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_397] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_392] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_97] (rows=28287952 width=347) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_369] (rows=28287952 width=235) + Conds:RS_94._col0=RS_411._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 24 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_411] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_406] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_368] (rows=254608997 width=363) + Conds:RS_483._col1, _col4=RS_485._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_483] + PartitionCols:_col1, _col4 + Select Operator [SEL_482] (rows=143894769 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_481] (rows=143894769 width=243) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_100_item_i_item_sk_min) AND DynamicValue(RS_100_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_100_item_i_item_sk_bloom_filter))) and (ws_promo_sk BETWEEN DynamicValue(RS_107_promotion_p_promo_sk_min) AND DynamicValue(RS_107_promotion_p_promo_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_107_promotion_p_promo_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) and ws_promo_sk is not null and ws_sold_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_83] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_478] + Group By Operator [GBY_477] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_404] + Group By Operator [GBY_401] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_398] (rows=154000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_392] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_476] + Group By Operator [GBY_475] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_418] + Group By Operator [GBY_415] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_412] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_406] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_480] + Group By Operator [GBY_479] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_436] + Group By Operator [GBY_433] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_430] (rows=1150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_424] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_485] + PartitionCols:_col0, _col1 + Select Operator [SEL_484] (rows=14398467 width=221) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_86] (rows=14398467 width=221) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] + <-Reducer 5 [CONTAINS] vectorized + Reduce Output Operator [RS_450] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_449] (rows=29791 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Top N Key Operator [TNK_448] (rows=19861 width=618) + keys:_col0, _col1, 0L,sort order:+++,top n:100 + Select Operator [SEL_447] (rows=411 width=617) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_446] (rows=411 width=436) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=8220 width=436) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col3)","sum(_col4)"],keys:_col2 + Merge Join Operator [MERGEJOIN_362] (rows=15038783 width=324) + Conds:RS_30._col0=RS_425._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_425] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_424] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=30077566 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_361] (rows=30077566 width=352) + Conds:RS_23._col2=RS_445._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col8"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_445] + PartitionCols:_col0 + Select Operator [SEL_444] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_18] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_360] (rows=30077566 width=253) + Conds:RS_393._col0=RS_21._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_393] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_392] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=90232695 width=317) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_359] (rows=90232695 width=177) + Conds:RS_14._col0=RS_407._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 24 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_407] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_406] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_358] (rows=812149846 width=374) + Conds:RS_441._col1, _col4=RS_443._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_441] + PartitionCols:_col1, _col4 + Select Operator [SEL_440] (rows=501693263 width=233) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_439] (rows=501693263 width=233) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_31_promotion_p_promo_sk_min) AND DynamicValue(RS_31_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_31_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_420] + Group By Operator [GBY_419] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_416] + Group By Operator [GBY_413] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_408] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_406] + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_438] + Group By Operator [GBY_437] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_434] + Group By Operator [GBY_431] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_426] (rows=1150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_424] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_422] + Group By Operator [GBY_421] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_402] + Group By Operator [GBY_399] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_394] (rows=154000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_392] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_443] + PartitionCols:_col0, _col1 + Select Operator [SEL_442] (rows=57591150 width=224) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_6] (rows=57591150 width=224) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out new file mode 100644 index 0000000000..bcfe19e196 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out @@ -0,0 +1,220 @@ +PREHOOK: query: explain +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 15 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_210] + Select Operator [SEL_209] (rows=100 width=1506) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Limit [LIM_208] (rows=100 width=1420) + Number of rows:100 + Select Operator [SEL_207] (rows=1577696 width=1418) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_63] + Select Operator [SEL_62] (rows=1577696 width=1418) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_178] (rows=1577696 width=1418) + Conds:RS_59._col0=RS_60._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=1609248 width=227) + Output:["_col0","_col2"] + Filter Operator [FIL_54] (rows=1609248 width=227) + predicate:(_col2 > _col3) + Merge Join Operator [MERGEJOIN_177] (rows=4827746 width=227) + Conds:RS_201._col1=RS_206._col1(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_206] + PartitionCols:_col1 + Select Operator [SEL_205] (rows=12 width=198) + Output:["_col0","_col1"] + Group By Operator [GBY_204] (rows=12 width=206) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 + Select Operator [SEL_203] (rows=5266632 width=201) + Output:["_col0","_col2"] + Group By Operator [GBY_202] (rows=5266632 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Group By Operator [GBY_42] (rows=8749496 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 + Merge Join Operator [MERGEJOIN_176] (rows=8749496 width=194) + Conds:RS_38._col2=RS_198._col0(Inner),Output:["_col1","_col3","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] + PartitionCols:_col0 + Select Operator [SEL_196] (rows=40000000 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_195] (rows=40000000 width=90) + predicate:ca_state is not null + TableScan [TS_12] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_175] (rows=8749496 width=112) + Conds:RS_190._col0=RS_194._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_192] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_191] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_188] (rows=28221532 width=121) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_186] (rows=28221532 width=121) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) + TableScan [TS_6] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_201] + PartitionCols:_col1 + Select Operator [SEL_200] (rows=4827746 width=201) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_199] (rows=4827746 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=8574602 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 + Merge Join Operator [MERGEJOIN_174] (rows=8574602 width=194) + Conds:RS_18._col2=RS_197._col0(Inner),Output:["_col1","_col3","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_196] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_173] (rows=8574602 width=112) + Conds:RS_189._col0=RS_193._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_192] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0 + Select Operator [SEL_187] (rows=27657410 width=121) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_185] (rows=27657410 width=121) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_172] (rows=1568628 width=1310) + Conds:RS_181._col2=RS_184._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_181] + PartitionCols:_col2 + Select Operator [SEL_180] (rows=80000000 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_179] (rows=80000000 width=375) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=375) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_184] + PartitionCols:_col0 + Select Operator [SEL_183] (rows=784314 width=941) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_182] (rows=784314 width=1027) + predicate:(ca_state = 'IL') + TableScan [TS_3] (rows=40000000 width=1027) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out new file mode 100644 index 0000000000..5a506f8321 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: explain +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 30 and 30+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-05-30' as date) and (cast('2002-05-30' as date) + 60 days) + and i_manufact_id in (437,129,727,663) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_96] + Limit [LIM_95] (rows=100 width=396) + Number of rows:100 + Select Operator [SEL_94] (rows=2871 width=396) + Output:["_col0","_col1","_col2"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] + Group By Operator [GBY_92] (rows=2871 width=396) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_22] (rows=2871 width=396) + Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 + Top N Key Operator [TNK_43] (rows=3564040 width=396) + keys:_col2, _col3, _col4,sort order:+++,top n:100 + Merge Join Operator [MERGEJOIN_77] (rows=3564040 width=396) + Conds:RS_91._col0=RS_19._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 7 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=2871 width=400) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_76] (rows=2871 width=400) + Conds:RS_14._col1=RS_86._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_86] + PartitionCols:_col0 + Select Operator [SEL_85] (rows=297 width=400) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_84] (rows=297 width=404) + predicate:((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60) + TableScan [TS_8] (rows=462000 width=403) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_75] (rows=463969 width=4) + Conds:RS_80._col0=RS_83._col0(Inner),Output:["_col1"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_80] + PartitionCols:_col0 + Select Operator [SEL_79] (rows=4176000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_78] (rows=4176000 width=11) + predicate:inv_quantity_on_hand BETWEEN 100 AND 500 + TableScan [TS_2] (rows=37584000 width=11) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_81] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' + TableScan [TS_5] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=575995635 width=4) + Output:["_col0"] + Filter Operator [FIL_89] (rows=575995635 width=4) + predicate:(in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter)) and ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max)) + TableScan [TS_0] (rows=575995635 width=4) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_46] + Group By Operator [GBY_45] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_44] (rows=2871 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_17] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out new file mode 100644 index 0000000000..4c9fc683d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out @@ -0,0 +1,338 @@ +PREHOOK: query: explain +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 22 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (ONE_TO_ONE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 21 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_397] + Limit [LIM_396] (rows=100 width=260) + Number of rows:100 + Select Operator [SEL_395] (rows=130021 width=260) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_124] + Select Operator [SEL_123] (rows=130021 width=260) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_360] (rows=130021 width=148) + Conds:RS_120._col0=RS_394._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7","_col8"] + <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_394] + PartitionCols:_col0 + Select Operator [SEL_393] (rows=130021 width=116) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_392] (rows=130021 width=108) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_114] + PartitionCols:_col0 + Group By Operator [GBY_113] (rows=390063 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_358] (rows=5752600 width=103) + Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_110] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_349] (rows=5070 width=4) + Conds:RS_370._col1=RS_379._col0(Inner),Output:["_col0"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_370] + PartitionCols:_col1 + Select Operator [SEL_369] (rows=73049 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_368] (rows=73049 width=98) + predicate:d_date is not null + TableScan [TS_5] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_379] + PartitionCols:_col0 + Group By Operator [GBY_378] (rows=5070 width=94) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=5070 width=94) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_348] (rows=10141 width=94) + Conds:RS_373._col1=RS_377._col0(Left Semi),Output:["_col0"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_373] + PartitionCols:_col1 + Select Operator [SEL_372] (rows=73049 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_371] (rows=73049 width=98) + predicate:(d_date is not null and d_week_seq is not null) + TableScan [TS_8] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_377] + PartitionCols:_col0 + Group By Operator [GBY_376] (rows=1826 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_375] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_374] (rows=3652 width=98) + predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) + TableScan [TS_11] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_353] (rows=13749816 width=107) + Conds:RS_391._col1=RS_367._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_367] + PartitionCols:_col0 + Select Operator [SEL_364] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_391] + PartitionCols:_col1 + Select Operator [SEL_390] (rows=13749816 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_389] (rows=13749816 width=11) + predicate:wr_returned_date_sk is not null + TableScan [TS_78] (rows=14398467 width=11) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_120] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_359] (rows=134905 width=132) + Conds:RS_382._col0=RS_388._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_388] + PartitionCols:_col0 + Select Operator [SEL_387] (rows=141711 width=116) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_386] (rows=141711 width=108) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col0 + Group By Operator [GBY_74] (rows=462000 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_357] (rows=25343167 width=103) + Conds:RS_70._col0=RS_71._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_349] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_350] (rows=55578005 width=107) + Conds:RS_385._col1=RS_366._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_366] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_364] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_385] + PartitionCols:_col1 + Select Operator [SEL_384] (rows=55578005 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_383] (rows=55578005 width=11) + predicate:sr_returned_date_sk is not null + TableScan [TS_39] (rows=57591150 width=11) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_382] + PartitionCols:_col0 + Select Operator [SEL_381] (rows=134905 width=116) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_380] (rows=134905 width=108) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Group By Operator [GBY_35] (rows=462000 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_356] (rows=12501392 width=103) + Conds:RS_31._col0=RS_32._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_349] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_347] (rows=28798881 width=107) + Conds:RS_363._col1=RS_365._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_365] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_364] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_363] + PartitionCols:_col1 + Select Operator [SEL_362] (rows=28798881 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_361] (rows=28798881 width=11) + predicate:cr_returned_date_sk is not null + TableScan [TS_0] (rows=28798881 width=11) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out new file mode 100644 index 0000000000..4aeb0285d3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: explain +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 6 <- Reducer 10 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_141] + Limit [LIM_140] (rows=100 width=384) + Number of rows:100 + Select Operator [SEL_139] (rows=255298 width=384) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_35] + Select Operator [SEL_34] (rows=255298 width=384) + Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_121] (rows=255298 width=284) + Conds:RS_124._col0=RS_32._col1(Inner),Output:["_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=55577698 width=3) + Output:["_col0"] + Filter Operator [FIL_122] (rows=55577698 width=3) + predicate:sr_cdemo_sk is not null + TableScan [TS_0] (rows=57591150 width=3) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_cdemo_sk"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col1 + Select Operator [SEL_30] (rows=8433 width=288) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_120] (rows=8433 width=288) + Conds:RS_27._col0=RS_28._col1(Inner),Output:["_col3","_col5","_col6"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1 + Select Operator [SEL_23] (rows=84323 width=288) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_119] (rows=84323 width=288) + Conds:RS_20._col1=RS_138._col0(Inner),Output:["_col0","_col2","_col4","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=1861800 width=4) + Output:["_col0"] + TableScan [TS_15] (rows=1861800 width=4) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_118] (rows=83148 width=284) + Conds:RS_133._col3=RS_136._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_135] (rows=44643 width=4) + Output:["_col0"] + Filter Operator [FIL_134] (rows=44643 width=97) + predicate:(ca_city = 'Hopewell') + TableScan [TS_12] (rows=40000000 width=97) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_133] + PartitionCols:_col3 + Select Operator [SEL_132] (rows=74500295 width=295) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_131] (rows=74500295 width=291) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) + TableScan [TS_9] (rows=80000000 width=291) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_id","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_name","c_last_name"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_117] (rows=721 width=4) + Conds:RS_127._col1=RS_130._col0(Inner),Output:["_col0"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col1 + Select Operator [SEL_126] (rows=7200 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_125] (rows=7200 width=8) + predicate:hd_income_band_sk is not null + TableScan [TS_3] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] + PartitionCols:_col0 + Select Operator [SEL_129] (rows=2 width=4) + Output:["_col0"] + Filter Operator [FIL_128] (rows=2 width=12) + predicate:((ib_lower_bound >= 32287) and (ib_upper_bound <= 82287)) + TableScan [TS_6] (rows=20 width=12) + default@income_band,income_band,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk","ib_lower_bound","ib_upper_bound"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out new file mode 100644 index 0000000000..c461467495 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out @@ -0,0 +1,322 @@ +PREHOOK: query: explain +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@reason +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 13 <- Reducer 6 (BROADCAST_EDGE) +Reducer 10 <- Map 7 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_211] + Limit [LIM_210] (rows=72 width=832) + Number of rows:100 + Select Operator [SEL_209] (rows=72 width=832) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] + Select Operator [SEL_207] (rows=72 width=832) + Output:["_col4","_col5","_col6","_col7"] + Group By Operator [GBY_206] (rows=72 width=353) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Group By Operator [GBY_43] (rows=72 width=353) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col2)","count(_col2)","sum(_col5)","count(_col5)","sum(_col4)","count(_col4)"],keys:_col7 + Merge Join Operator [MERGEJOIN_181] (rows=16740 width=100) + Conds:RS_39._col3=RS_205._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] + PartitionCols:_col0 + Select Operator [SEL_204] (rows=72 width=101) + Output:["_col0","_col1"] + TableScan [TS_34] (rows=72 width=101) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_180] (rows=16740 width=3) + Conds:RS_184._col0=RS_37._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_184] + PartitionCols:_col0 + Select Operator [SEL_183] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_182] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Select Operator [SEL_33] (rows=46881 width=7) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_32] (rows=46881 width=31) + predicate:((_col16 and _col12) or (_col17 and _col13) or (_col18 and _col14)) + Merge Join Operator [MERGEJOIN_179] (rows=62510 width=31) + Conds:RS_29._col5=RS_203._col0(Inner),Output:["_col3","_col4","_col7","_col8","_col9","_col12","_col13","_col14","_col16","_col17","_col18"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + PartitionCols:_col0 + Select Operator [SEL_202] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_201] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) + TableScan [TS_23] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_178] (rows=708441 width=185) + Conds:RS_191._col0, _col1, _col2=RS_27._col3, _col7, _col8(Inner),Output:["_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_191] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_189] (rows=265971 width=183) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_188] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) + TableScan [TS_3] (rows=1861800 width=183) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col3, _col7, _col8 + Select Operator [SEL_22] (rows=708441 width=367) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Filter Operator [FIL_21] (rows=708441 width=403) + predicate:((_col21 and _col22 and _col15) or (_col23 and _col24 and _col16) or (_col25 and _col26 and _col17)) + Merge Join Operator [MERGEJOIN_177] (rows=1889180 width=403) + Conds:RS_18._col1=RS_192._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_192] + PartitionCols:_col0 + Select Operator [SEL_190] (rows=265971 width=207) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Please refer to the previous Filter Operator [FIL_188] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=13039884 width=262) + Conds:RS_195._col0, _col5=RS_200._col1, _col2(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0, _col5 + Select Operator [SEL_194] (rows=11975292 width=237) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_193] (rows=11975292 width=237) + predicate:(wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_6] (rows=14398467 width=237) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] + PartitionCols:_col1, _col2 + Select Operator [SEL_199] (rows=15992347 width=39) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_198] (rows=15992347 width=243) + predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_36_date_dim_d_date_sk_min) AND DynamicValue(RS_36_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_36_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_9] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_197] + Group By Operator [GBY_196] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_187] + Group By Operator [GBY_186] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_185] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_183] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out new file mode 100644 index 0000000000..638cce1b46 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: explain +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_82] + Limit [LIM_81] (rows=100 width=490) + Number of rows:100 + Select Operator [SEL_80] (rows=3060 width=490) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + Select Operator [SEL_78] (rows=3060 width=490) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_77] (rows=3060 width=302) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] + Select Operator [SEL_76] (rows=3060 width=302) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_75] + PartitionCols:(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END + Select Operator [SEL_74] (rows=3060 width=302) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_73] (rows=3060 width=302) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_16] (rows=88740 width=302) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L + Select Operator [SEL_14] (rows=24992810 width=293) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_59] (rows=24992810 width=293) + Conds:RS_11._col1=RS_72._col0(Inner),Output:["_col2","_col5","_col6"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] + PartitionCols:_col0 + Select Operator [SEL_71] (rows=462000 width=186) + Output:["_col0","_col1","_col2"] + TableScan [TS_6] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_58] (rows=24992810 width=115) + Conds:RS_70._col0=RS_62._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_62] + PartitionCols:_col0 + Select Operator [SEL_61] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_60] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_70] + PartitionCols:_col0 + Select Operator [SEL_69] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_68] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_9_d1_d_date_sk_min) AND DynamicValue(RS_9_d1_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_d1_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_net_paid"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_67] + Group By Operator [GBY_66] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_65] + Group By Operator [GBY_64] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_63] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_61] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out new file mode 100644 index 0000000000..b6d3904c1c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out @@ -0,0 +1,288 @@ +PREHOOK: query: explain +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE) +Map 21 <- Reducer 15 (BROADCAST_EDGE) +Map 22 <- Reducer 19 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 17 <- Map 20 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_268] + Group By Operator [GBY_267] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_266] + Group By Operator [GBY_265] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_264] (rows=8062883 width=16) + Filter Operator [FIL_263] (rows=8062883 width=16) + predicate:(((_col3 * 2) = _col4) and (_col3 > 0L)) + Select Operator [SEL_262] (rows=48377300 width=16) + Output:["_col3","_col4"] + Group By Operator [GBY_261] (rows=48377300 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 18 [CONTAINS] vectorized + Reduce Output Operator [RS_292] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_291] (rows=48377300 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 + Select Operator [SEL_290] (rows=48377300 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Select Operator [SEL_289] (rows=24986582 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_288] (rows=24986582 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_287] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_286] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_76] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_189] (rows=24986582 width=274) + Conds:RS_72._col1=RS_245._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + PartitionCols:_col0 + Select Operator [SEL_242] (rows=80000000 width=184) + Output:["_col0","_col1","_col2"] + TableScan [TS_6] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_188] (rows=24986582 width=97) + Conds:RS_285._col0=RS_229._col0(Inner),Output:["_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + PartitionCols:_col0 + Select Operator [SEL_224] (rows=317 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_223] (rows=317 width=102) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_285] + PartitionCols:_col0 + Select Operator [SEL_284] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_283] (rows=143930993 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_61] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_282] + Group By Operator [GBY_281] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_230] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_224] + <-Reducer 6 [CONTAINS] vectorized + Reduce Output Operator [RS_260] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_259] (rows=48377300 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 + Select Operator [SEL_258] (rows=48377300 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Select Operator [SEL_257] (rows=23390718 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_256] (rows=23390718 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col0, _col1, _col2 + Select Operator [SEL_255] (rows=23390718 width=290) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_254] (rows=23390718 width=290) + predicate:(((_col3 * 2) = _col4) and (_col3 > 0L)) + Group By Operator [GBY_253] (rows=140344308 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] vectorized + Reduce Output Operator [RS_280] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_279] (rows=140344308 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 + Select Operator [SEL_278] (rows=140344308 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Select Operator [SEL_277] (rows=49146883 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_276] (rows=49146883 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_275] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_274] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_39] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_187] (rows=49146883 width=274) + Conds:RS_35._col1=RS_244._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_242] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_186] (rows=49146883 width=97) + Conds:RS_273._col0=RS_227._col0(Inner),Output:["_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_224] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_273] + PartitionCols:_col0 + Select Operator [SEL_272] (rows=285117831 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_271] (rows=285117831 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_24] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_270] + Group By Operator [GBY_269] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_228] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_224] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_252] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_251] (rows=140344308 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 + Select Operator [SEL_250] (rows=140344308 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Select Operator [SEL_249] (rows=91197425 width=290) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_248] (rows=91197425 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_247] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_246] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_15] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_185] (rows=91197425 width=274) + Conds:RS_11._col1=RS_243._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_242] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_184] (rows=91197425 width=96) + Conds:RS_241._col0=RS_225._col0(Inner),Output:["_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_224] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] + PartitionCols:_col0 + Select Operator [SEL_240] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_239] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + Group By Operator [GBY_231] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_226] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_224] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out new file mode 100644 index 0000000000..430b443fc5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out @@ -0,0 +1,962 @@ +Warning: Shuffle Join MERGEJOIN[607][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[608][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[609][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[610][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[611][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[612][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[613][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE) +Map 68 <- Reducer 19 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 61 (BROADCAST_EDGE) +Map 69 <- Reducer 24 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE) +Map 70 <- Reducer 29 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE) +Map 71 <- Reducer 34 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE), Reducer 64 (BROADCAST_EDGE) +Map 72 <- Reducer 39 (BROADCAST_EDGE), Reducer 56 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) +Map 73 <- Reducer 44 (BROADCAST_EDGE), Reducer 57 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE) +Map 74 <- Reducer 49 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) +Reducer 10 <- Reducer 38 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 43 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 48 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 68 (SIMPLE_EDGE) +Reducer 16 <- Map 50 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 59 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 20 <- Map 13 (SIMPLE_EDGE), Map 69 (SIMPLE_EDGE) +Reducer 21 <- Map 50 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 59 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 13 (SIMPLE_EDGE), Map 70 (SIMPLE_EDGE) +Reducer 26 <- Map 50 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 59 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 50 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 13 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE) +Reducer 31 <- Map 50 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 59 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Map 13 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) +Reducer 36 <- Map 50 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) +Reducer 37 <- Map 59 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 59 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 40 <- Map 13 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) +Reducer 41 <- Map 50 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) +Reducer 42 <- Map 59 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) +Reducer 43 <- Reducer 42 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 45 <- Map 13 (SIMPLE_EDGE), Map 74 (SIMPLE_EDGE) +Reducer 46 <- Map 50 (SIMPLE_EDGE), Reducer 45 (SIMPLE_EDGE) +Reducer 47 <- Map 59 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE) +Reducer 48 <- Reducer 47 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 51 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 52 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 54 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 55 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 56 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 57 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 58 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 60 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 61 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 62 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 63 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 65 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 66 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 67 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 12 + File Output Operator [FS_238] + Select Operator [SEL_237] (rows=1 width=64) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_613] (rows=1 width=64) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_234] + Merge Join Operator [MERGEJOIN_612] (rows=1 width=56) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_231] + Merge Join Operator [MERGEJOIN_611] (rows=1 width=48) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_795] + Group By Operator [GBY_794] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_159] + Group By Operator [GBY_158] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_600] (rows=1352994 width=8) + Conds:RS_154._col0=RS_712._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_712] + PartitionCols:_col0 + Select Operator [SEL_701] (rows=155 width=4) + Output:["_col0"] + Filter Operator [FIL_700] (rows=155 width=92) + predicate:(s_store_name = 'ese') + TableScan [TS_16] (rows=1704 width=92) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_154] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_599] (rows=1842898 width=0) + Conds:RS_147._col0=RS_676._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_676] + PartitionCols:_col0 + Select Operator [SEL_663] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_655] (rows=1515 width=12) + predicate:((t_hour = 10) and (t_minute < 30)) + TableScan [TS_6] (rows=86400 width=12) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_147] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_598] (rows=56928540 width=4) + Conds:RS_793._col1=RS_626._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_626] + PartitionCols:_col0 + Select Operator [SEL_615] (rows=817 width=4) + Output:["_col0"] + Filter Operator [FIL_614] (rows=817 width=12) + predicate:((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5)) + TableScan [TS_3] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Map 72 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_793] + PartitionCols:_col1 + Select Operator [SEL_792] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_791] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_145_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_145_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_145_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_148_time_dim_t_time_sk_min) AND DynamicValue(RS_148_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_148_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_155_store_s_store_sk_min) AND DynamicValue(RS_155_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_155_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_135] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 39 [BROADCAST_EDGE] vectorized + BROADCAST [RS_786] + Group By Operator [GBY_785] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_645] + Group By Operator [GBY_637] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_627] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 56 [BROADCAST_EDGE] vectorized + BROADCAST [RS_788] + Group By Operator [GBY_787] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_695] + Group By Operator [GBY_687] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_677] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_663] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_790] + Group By Operator [GBY_789] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_731] + Group By Operator [GBY_723] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_713] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_228] + Merge Join Operator [MERGEJOIN_610] (rows=1 width=40) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_784] + Group By Operator [GBY_783] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_597] (rows=1352994 width=8) + Conds:RS_127._col0=RS_710._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_710] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_127] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_596] (rows=1842898 width=0) + Conds:RS_120._col0=RS_674._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_674] + PartitionCols:_col0 + Select Operator [SEL_662] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_654] (rows=1515 width=12) + predicate:((t_hour = 10) and (t_minute >= 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_120] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_595] (rows=56928540 width=4) + Conds:RS_782._col1=RS_624._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_624] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 71 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_782] + PartitionCols:_col1 + Select Operator [SEL_781] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_780] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_118_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_118_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_118_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_121_time_dim_t_time_sk_min) AND DynamicValue(RS_121_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_121_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_128_store_s_store_sk_min) AND DynamicValue(RS_128_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_128_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_108] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_775] + Group By Operator [GBY_774] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_644] + Group By Operator [GBY_636] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_625] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 55 [BROADCAST_EDGE] vectorized + BROADCAST [RS_777] + Group By Operator [GBY_776] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_694] + Group By Operator [GBY_686] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_675] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_662] + <-Reducer 64 [BROADCAST_EDGE] vectorized + BROADCAST [RS_779] + Group By Operator [GBY_778] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_730] + Group By Operator [GBY_722] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_711] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_225] + Merge Join Operator [MERGEJOIN_609] (rows=1 width=32) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_773] + Group By Operator [GBY_772] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_104] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_594] (rows=1352994 width=8) + Conds:RS_100._col0=RS_708._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_708] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col0 + Select Operator [SEL_96] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_593] (rows=1842898 width=0) + Conds:RS_93._col0=RS_672._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_672] + PartitionCols:_col0 + Select Operator [SEL_661] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_653] (rows=1515 width=12) + predicate:((t_hour = 11) and (t_minute < 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_592] (rows=56928540 width=4) + Conds:RS_771._col1=RS_622._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_622] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 70 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_771] + PartitionCols:_col1 + Select Operator [SEL_770] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_769] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_91_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_91_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_91_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_94_time_dim_t_time_sk_min) AND DynamicValue(RS_94_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_94_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_101_store_s_store_sk_min) AND DynamicValue(RS_101_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_101_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_81] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_764] + Group By Operator [GBY_763] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_643] + Group By Operator [GBY_635] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_623] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 54 [BROADCAST_EDGE] vectorized + BROADCAST [RS_766] + Group By Operator [GBY_765] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_693] + Group By Operator [GBY_685] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_673] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_661] + <-Reducer 63 [BROADCAST_EDGE] vectorized + BROADCAST [RS_768] + Group By Operator [GBY_767] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_729] + Group By Operator [GBY_721] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_709] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_222] + Merge Join Operator [MERGEJOIN_608] (rows=1 width=24) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_762] + Group By Operator [GBY_761] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_78] + Group By Operator [GBY_77] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_591] (rows=1352994 width=8) + Conds:RS_73._col0=RS_706._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_706] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col0 + Select Operator [SEL_69] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_590] (rows=1842898 width=0) + Conds:RS_66._col0=RS_670._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_670] + PartitionCols:_col0 + Select Operator [SEL_660] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_652] (rows=1515 width=12) + predicate:((t_hour = 11) and (t_minute >= 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_589] (rows=56928540 width=4) + Conds:RS_760._col1=RS_620._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_620] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_760] + PartitionCols:_col1 + Select Operator [SEL_759] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_758] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_64_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_64_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_64_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_67_time_dim_t_time_sk_min) AND DynamicValue(RS_67_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_67_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_74_store_s_store_sk_min) AND DynamicValue(RS_74_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_74_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_54] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_753] + Group By Operator [GBY_752] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_642] + Group By Operator [GBY_634] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_621] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_755] + Group By Operator [GBY_754] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_692] + Group By Operator [GBY_684] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_671] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_660] + <-Reducer 62 [BROADCAST_EDGE] vectorized + BROADCAST [RS_757] + Group By Operator [GBY_756] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_728] + Group By Operator [GBY_720] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_707] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_219] + Merge Join Operator [MERGEJOIN_607] (rows=1 width=16) + Conds:(Inner),Output:["_col0","_col1"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_751] + Group By Operator [GBY_750] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_51] + Group By Operator [GBY_50] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_588] (rows=1352994 width=8) + Conds:RS_46._col0=RS_704._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_704] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_587] (rows=1842898 width=0) + Conds:RS_39._col0=RS_668._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_668] + PartitionCols:_col0 + Select Operator [SEL_659] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_651] (rows=1515 width=12) + predicate:((t_hour = 12) and (t_minute < 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_586] (rows=56928540 width=4) + Conds:RS_749._col1=RS_618._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_618] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 68 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_749] + PartitionCols:_col1 + Select Operator [SEL_748] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_747] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_37_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_37_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_37_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_40_time_dim_t_time_sk_min) AND DynamicValue(RS_40_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_40_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_47_store_s_store_sk_min) AND DynamicValue(RS_47_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_47_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_27] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_742] + Group By Operator [GBY_741] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_641] + Group By Operator [GBY_633] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_619] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_744] + Group By Operator [GBY_743] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_691] + Group By Operator [GBY_683] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_669] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_659] + <-Reducer 61 [BROADCAST_EDGE] vectorized + BROADCAST [RS_746] + Group By Operator [GBY_745] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_727] + Group By Operator [GBY_719] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_705] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_740] + Group By Operator [GBY_739] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_24] + Group By Operator [GBY_23] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_585] (rows=1352994 width=8) + Conds:RS_19._col0=RS_702._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_702] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_15] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_584] (rows=1842898 width=0) + Conds:RS_12._col0=RS_666._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_666] + PartitionCols:_col0 + Select Operator [SEL_658] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_650] (rows=1515 width=12) + predicate:((t_hour = 8) and (t_minute >= 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_583] (rows=56928540 width=4) + Conds:RS_738._col1=RS_616._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_616] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_738] + PartitionCols:_col1 + Select Operator [SEL_737] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_736] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_10_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_10_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_10_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_13_time_dim_t_time_sk_min) AND DynamicValue(RS_13_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_13_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_20_store_s_store_sk_min) AND DynamicValue(RS_20_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_20_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_649] + Group By Operator [GBY_648] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_640] + Group By Operator [GBY_632] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_617] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 51 [BROADCAST_EDGE] vectorized + BROADCAST [RS_699] + Group By Operator [GBY_698] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_690] + Group By Operator [GBY_682] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_667] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_658] + <-Reducer 60 [BROADCAST_EDGE] vectorized + BROADCAST [RS_735] + Group By Operator [GBY_734] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_726] + Group By Operator [GBY_718] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_703] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_806] + Group By Operator [GBY_805] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 42 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_186] + Group By Operator [GBY_185] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_603] (rows=1352994 width=8) + Conds:RS_181._col0=RS_714._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_714] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_181] + PartitionCols:_col0 + Select Operator [SEL_177] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_602] (rows=1842898 width=0) + Conds:RS_174._col0=RS_678._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_678] + PartitionCols:_col0 + Select Operator [SEL_664] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_656] (rows=1515 width=12) + predicate:((t_hour = 9) and (t_minute >= 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_174] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_601] (rows=56928540 width=4) + Conds:RS_804._col1=RS_628._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_628] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 73 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_804] + PartitionCols:_col1 + Select Operator [SEL_803] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_802] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_172_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_172_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_172_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_175_time_dim_t_time_sk_min) AND DynamicValue(RS_175_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_175_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_182_store_s_store_sk_min) AND DynamicValue(RS_182_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_182_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_162] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 44 [BROADCAST_EDGE] vectorized + BROADCAST [RS_797] + Group By Operator [GBY_796] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_646] + Group By Operator [GBY_638] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_629] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 57 [BROADCAST_EDGE] vectorized + BROADCAST [RS_799] + Group By Operator [GBY_798] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_696] + Group By Operator [GBY_688] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_679] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_664] + <-Reducer 66 [BROADCAST_EDGE] vectorized + BROADCAST [RS_801] + Group By Operator [GBY_800] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_732] + Group By Operator [GBY_724] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_715] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + <-Reducer 48 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_817] + Group By Operator [GBY_816] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 47 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_213] + Group By Operator [GBY_212] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_606] (rows=1352994 width=8) + Conds:RS_208._col0=RS_716._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_716] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_701] + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_208] + PartitionCols:_col0 + Select Operator [SEL_204] (rows=1842898 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_605] (rows=1842898 width=0) + Conds:RS_201._col0=RS_680._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_680] + PartitionCols:_col0 + Select Operator [SEL_665] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_657] (rows=1515 width=12) + predicate:((t_hour = 9) and (t_minute < 30)) + Please refer to the previous TableScan [TS_6] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_201] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_604] (rows=56928540 width=4) + Conds:RS_815._col1=RS_630._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_630] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_615] + <-Map 74 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_815] + PartitionCols:_col1 + Select Operator [SEL_814] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_813] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_199_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_199_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_199_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_202_time_dim_t_time_sk_min) AND DynamicValue(RS_202_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_202_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_209_store_s_store_sk_min) AND DynamicValue(RS_209_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_209_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_189] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 49 [BROADCAST_EDGE] vectorized + BROADCAST [RS_808] + Group By Operator [GBY_807] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_647] + Group By Operator [GBY_639] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_631] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_615] + <-Reducer 58 [BROADCAST_EDGE] vectorized + BROADCAST [RS_810] + Group By Operator [GBY_809] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_697] + Group By Operator [GBY_689] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_681] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_665] + <-Reducer 67 [BROADCAST_EDGE] vectorized + BROADCAST [RS_812] + Group By Operator [GBY_811] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_733] + Group By Operator [GBY_725] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_717] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_701] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out new file mode 100644 index 0000000000..950f86bf1c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: explain +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_116] + Limit [LIM_115] (rows=100 width=801) + Number of rows:100 + Select Operator [SEL_114] (rows=4804228 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Select Operator [SEL_112] (rows=4804228 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_111] (rows=4804228 width=689) + predicate:CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END + Select Operator [SEL_110] (rows=9608456 width=577) + Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + PTF Operator [PTF_109] (rows=9608456 width=577) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] + Select Operator [SEL_108] (rows=9608456 width=577) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col2, _col0, _col4, _col5 + Group By Operator [GBY_106] (rows=9608456 width=577) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_22] (rows=27308180 width=577) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)"],keys:_col0, _col1, _col2, _col5, _col7, _col8 + Merge Join Operator [MERGEJOIN_84] (rows=27308180 width=480) + Conds:RS_18._col3=RS_105._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7","_col8"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_104] (rows=1704 width=183) + Output:["_col0","_col1","_col2"] + TableScan [TS_16] (rows=1704 width=183) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col3 + Select Operator [SEL_15] (rows=27308180 width=301) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_83] (rows=27308180 width=301) + Conds:RS_12._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col9"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_95] + PartitionCols:_col0 + Select Operator [SEL_94] (rows=652 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_93] (rows=652 width=12) + predicate:(d_year = 2000) + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_82] (rows=76480702 width=364) + Conds:RS_103._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_87] + PartitionCols:_col0 + Select Operator [SEL_86] (rows=6988 width=286) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_85] (rows=6988 width=286) + predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and (i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants')) + TableScan [TS_3] (rows=462000 width=286) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] + PartitionCols:_col1 + Select Operator [SEL_102] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_101] (rows=525329897 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_10_item_i_item_sk_min) AND DynamicValue(RS_10_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_10_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_100] + Group By Operator [GBY_99] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_98] + Group By Operator [GBY_97] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_96] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_94] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_88] (rows=6988 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_86] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query9.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query9.q.out new file mode 100644 index 0000000000..66eee9afca --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query9.q.out @@ -0,0 +1,460 @@ +Warning: Shuffle Join MERGEJOIN[176][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[181][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[186][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 12' is a cross product +Warning: Shuffle Join MERGEJOIN[187][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[188][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[190][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +PREHOOK: query: explain +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 34 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 18 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 24 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 30 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 25 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 20 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 16 + File Output Operator [FS_159] + Select Operator [SEL_158] (rows=2 width=560) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_190] (rows=2 width=676) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_155] + Select Operator [SEL_147] (rows=2 width=564) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_189] (rows=2 width=680) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_144] + Merge Join Operator [MERGEJOIN_188] (rows=2 width=568) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_141] + Merge Join Operator [MERGEJOIN_187] (rows=2 width=564) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_138] + Select Operator [SEL_116] (rows=2 width=452) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_186] (rows=2 width=568) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Merge Join Operator [MERGEJOIN_185] (rows=2 width=456) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_110] + Merge Join Operator [MERGEJOIN_184] (rows=2 width=452) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_280] + Select Operator [SEL_279] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_278] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_257] + Group By Operator [GBY_252] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_247] (rows=182855757 width=110) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_242] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 41 AND 60 + TableScan [TS_24] (rows=575995635 width=110) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity","ss_net_paid_inc_tax"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_107] + Select Operator [SEL_85] (rows=2 width=340) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_183] (rows=2 width=456) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_277] + Select Operator [SEL_276] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_275] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_234] + Group By Operator [GBY_229] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_224] (rows=182855757 width=110) + Output:["ss_ext_list_price"] + Filter Operator [FIL_219] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 41 AND 60 + TableScan [TS_10] (rows=575995635 width=110) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity","ss_ext_list_price"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_82] + Merge Join Operator [MERGEJOIN_182] (rows=2 width=344) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_274] + Select Operator [SEL_273] (rows=1 width=4) + Output:["_col0"] + Group By Operator [GBY_272] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_211] + Group By Operator [GBY_206] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_201] (rows=182855757 width=3) + Filter Operator [FIL_196] (rows=182855757 width=3) + predicate:ss_quantity BETWEEN 41 AND 60 + TableScan [TS_3] (rows=575995635 width=3) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_79] + Merge Join Operator [MERGEJOIN_181] (rows=2 width=340) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_271] + Select Operator [SEL_270] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_269] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_256] + Group By Operator [GBY_251] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_246] (rows=182855757 width=110) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_241] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 21 AND 40 + Please refer to the previous TableScan [TS_24] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_76] + Select Operator [SEL_54] (rows=2 width=228) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_180] (rows=2 width=344) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_268] + Select Operator [SEL_267] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_266] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_233] + Group By Operator [GBY_228] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_223] (rows=182855757 width=110) + Output:["ss_ext_list_price"] + Filter Operator [FIL_218] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 21 AND 40 + Please refer to the previous TableScan [TS_10] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_51] + Merge Join Operator [MERGEJOIN_179] (rows=2 width=232) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_265] + Select Operator [SEL_264] (rows=1 width=4) + Output:["_col0"] + Group By Operator [GBY_263] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_210] + Group By Operator [GBY_205] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_200] (rows=182855757 width=3) + Filter Operator [FIL_195] (rows=182855757 width=3) + predicate:ss_quantity BETWEEN 21 AND 40 + Please refer to the previous TableScan [TS_3] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_48] + Merge Join Operator [MERGEJOIN_178] (rows=2 width=228) + Conds:(Left Outer),Output:["_col0","_col1","_col2"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_45] + Select Operator [SEL_23] (rows=2 width=116) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_177] (rows=2 width=116) + Conds:(Left Outer),Output:["_col1","_col2"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_20] + Merge Join Operator [MERGEJOIN_176] (rows=2 width=4) + Conds:(Left Outer),Output:["_col1"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_193] + Select Operator [SEL_192] (rows=2 width=4) + Filter Operator [FIL_191] (rows=2 width=4) + predicate:(r_reason_sk = 1) + TableScan [TS_0] (rows=72 width=4) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_216] + Select Operator [SEL_215] (rows=1 width=4) + Output:["_col0"] + Group By Operator [GBY_214] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_209] + Group By Operator [GBY_204] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_199] (rows=182855757 width=3) + Filter Operator [FIL_194] (rows=182855757 width=3) + predicate:ss_quantity BETWEEN 1 AND 20 + Please refer to the previous TableScan [TS_3] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_239] + Select Operator [SEL_238] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_237] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_232] + Group By Operator [GBY_227] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_222] (rows=182855757 width=110) + Output:["ss_ext_list_price"] + Filter Operator [FIL_217] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 1 AND 20 + Please refer to the previous TableScan [TS_10] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_262] + Select Operator [SEL_261] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_260] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_255] + Group By Operator [GBY_250] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_245] (rows=182855757 width=110) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_240] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 1 AND 20 + Please refer to the previous TableScan [TS_24] + <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_283] + Select Operator [SEL_282] (rows=1 width=4) + Output:["_col0"] + Group By Operator [GBY_281] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_212] + Group By Operator [GBY_207] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_202] (rows=182855757 width=3) + Filter Operator [FIL_197] (rows=182855757 width=3) + predicate:ss_quantity BETWEEN 61 AND 80 + Please refer to the previous TableScan [TS_3] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_286] + Select Operator [SEL_285] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_284] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_235] + Group By Operator [GBY_230] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_225] (rows=182855757 width=110) + Output:["ss_ext_list_price"] + Filter Operator [FIL_220] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 61 AND 80 + Please refer to the previous TableScan [TS_10] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_289] + Select Operator [SEL_288] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_287] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_258] + Group By Operator [GBY_253] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_248] (rows=182855757 width=110) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_243] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 61 AND 80 + Please refer to the previous TableScan [TS_24] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] + Select Operator [SEL_291] (rows=1 width=4) + Output:["_col0"] + Group By Operator [GBY_290] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_213] + Group By Operator [GBY_208] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_203] (rows=182855757 width=3) + Filter Operator [FIL_198] (rows=182855757 width=3) + predicate:ss_quantity BETWEEN 81 AND 100 + Please refer to the previous TableScan [TS_3] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_295] + Select Operator [SEL_294] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_293] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_236] + Group By Operator [GBY_231] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_226] (rows=182855757 width=110) + Output:["ss_ext_list_price"] + Filter Operator [FIL_221] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 81 AND 100 + Please refer to the previous TableScan [TS_10] + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_298] + Select Operator [SEL_297] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_296] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_259] + Group By Operator [GBY_254] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_249] (rows=182855757 width=110) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_244] (rows=182855757 width=110) + predicate:ss_quantity BETWEEN 81 AND 100 + Please refer to the previous TableScan [TS_24] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query90.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query90.q.out new file mode 100644 index 0000000000..c1324fc100 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query90.q.out @@ -0,0 +1,267 @@ +Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +PREHOOK: query: explain +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 21 <- Reducer 14 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_213] + Limit [LIM_212] (rows=1 width=112) + Number of rows:100 + Select Operator [SEL_211] (rows=1 width=112) + Output:["_col0"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_58] + Select Operator [SEL_57] (rows=1 width=112) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_154] (rows=1 width=224) + Conds:(Inner),Output:["_col0","_col1"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_210] + Select Operator [SEL_209] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_208] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_51] + Group By Operator [GBY_50] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_153] (rows=153010 width=8) + Conds:RS_46._col0=RS_185._col0(Inner) + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_185] + PartitionCols:_col0 + Select Operator [SEL_182] (rows=511 width=4) + Output:["_col0"] + Filter Operator [FIL_181] (rows=511 width=7) + predicate:wp_char_count BETWEEN 5000 AND 5200 + TableScan [TS_16] (rows=4602 width=7) + default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk","wp_char_count"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=1377983 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_152] (rows=1377983 width=3) + Conds:RS_39._col1=RS_173._col0(Inner),Output:["_col2"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_173] + PartitionCols:_col0 + Select Operator [SEL_170] (rows=655 width=4) + Output:["_col0"] + Filter Operator [FIL_169] (rows=655 width=8) + predicate:(hd_dep_count = 8) + TableScan [TS_6] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_151] (rows=15147293 width=7) + Conds:RS_207._col0=RS_161._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=9095 width=4) + Output:["_col0"] + Filter Operator [FIL_156] (rows=9095 width=8) + predicate:t_hour BETWEEN 14 AND 15 + TableScan [TS_3] (rows=86400 width=8) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_207] + PartitionCols:_col0 + Select Operator [SEL_206] (rows=143895111 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_205] (rows=143895111 width=11) + predicate:((ws_ship_hdemo_sk BETWEEN DynamicValue(RS_40_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_40_household_demographics_hd_demo_sk_max) and in_bloom_filter(ws_ship_hdemo_sk, DynamicValue(RS_40_household_demographics_hd_demo_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_37_time_dim_t_time_sk_min) AND DynamicValue(RS_37_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_37_time_dim_t_time_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_47_web_page_wp_web_page_sk_min) AND DynamicValue(RS_47_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_47_web_page_wp_web_page_sk_bloom_filter))) and ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null and ws_web_page_sk is not null) + TableScan [TS_27] (rows=144002668 width=11) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_time_sk","ws_ship_hdemo_sk","ws_web_page_sk"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_200] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_166] + Group By Operator [GBY_164] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_162] (rows=9095 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_158] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_202] + Group By Operator [GBY_201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_178] + Group By Operator [GBY_176] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_174] (rows=655 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_170] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_204] + Group By Operator [GBY_203] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + Group By Operator [GBY_188] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_186] (rows=511 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_182] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_198] + Select Operator [SEL_197] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_196] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_24] + Group By Operator [GBY_23] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_150] (rows=153010 width=8) + Conds:RS_19._col0=RS_183._col0(Inner) + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_182] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_15] (rows=1377983 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_149] (rows=1377983 width=3) + Conds:RS_12._col1=RS_171._col0(Inner),Output:["_col2"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_171] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_170] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_148] (rows=15147293 width=7) + Conds:RS_195._col0=RS_159._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_159] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=9095 width=4) + Output:["_col0"] + Filter Operator [FIL_155] (rows=9095 width=8) + predicate:t_hour BETWEEN 6 AND 7 + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0 + Select Operator [SEL_194] (rows=143895111 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_193] (rows=143895111 width=11) + predicate:((ws_ship_hdemo_sk BETWEEN DynamicValue(RS_13_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_13_household_demographics_hd_demo_sk_max) and in_bloom_filter(ws_ship_hdemo_sk, DynamicValue(RS_13_household_demographics_hd_demo_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_10_time_dim_t_time_sk_min) AND DynamicValue(RS_10_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_10_time_dim_t_time_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_20_web_page_wp_web_page_sk_min) AND DynamicValue(RS_20_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_20_web_page_wp_web_page_sk_bloom_filter))) and ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null and ws_web_page_sk is not null) + TableScan [TS_0] (rows=144002668 width=11) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_time_sk","ws_ship_hdemo_sk","ws_web_page_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_180] + Group By Operator [GBY_179] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_177] + Group By Operator [GBY_175] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_172] (rows=655 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_170] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_192] + Group By Operator [GBY_191] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + Group By Operator [GBY_187] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_184] (rows=511 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_182] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_165] + Group By Operator [GBY_163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_160] (rows=9095 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_157] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out new file mode 100644 index 0000000000..2cb787d4be --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: explain +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_171] + Select Operator [SEL_170] (rows=1920 width=406) + Output:["_col0","_col1","_col2","_col3"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_169] + Select Operator [SEL_168] (rows=1920 width=518) + Output:["_col0","_col1","_col2","_col4"] + Group By Operator [GBY_167] (rows=1920 width=585) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_41] (rows=1920 width=585) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)"],keys:_col1, _col2, _col3, _col4, _col5 + Merge Join Operator [MERGEJOIN_146] (rows=65760 width=473) + Conds:RS_37._col8=RS_166._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + PartitionCols:_col0 + Select Operator [SEL_165] (rows=3600 width=4) + Output:["_col0"] + Filter Operator [FIL_164] (rows=3600 width=96) + predicate:(hd_buy_potential like '0-500%') + TableScan [TS_31] (rows=7200 width=96) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_145] (rows=131519 width=473) + Conds:RS_149._col0=RS_35._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=59105 width=183) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_147] (rows=59105 width=183) + predicate:((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree'))) + TableScan [TS_0] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col4 + Select Operator [SEL_30] (rows=131519 width=294) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_144] (rows=131519 width=294) + Conds:RS_27._col0=RS_28._col3(Inner),Output:["_col1","_col2","_col5","_col6","_col7","_col9"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col3 + Select Operator [SEL_23] (rows=657590 width=312) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_143] (rows=657590 width=312) + Conds:RS_20._col2=RS_163._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Select Operator [SEL_162] (rows=60 width=298) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_15] (rows=60 width=298) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_142] (rows=657590 width=19) + Conds:RS_158._col0=RS_161._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=27658583 width=121) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_156] (rows=27658583 width=121) + predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) + TableScan [TS_9] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_159] (rows=50 width=12) + predicate:((d_moy = 11) and (d_year = 1999)) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_141] (rows=14900060 width=10) + Conds:RS_152._col3=RS_155._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col3 + Select Operator [SEL_151] (rows=74500295 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_150] (rows=74500295 width=15) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) + TableScan [TS_3] (rows=80000000 width=15) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] + PartitionCols:_col0 + Select Operator [SEL_154] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_153] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -7) + TableScan [TS_6] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query92.q.out new file mode 100644 index 0000000000..da5a94e126 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query92.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: explain +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 15 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) +Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_147] + Limit [LIM_146] (rows=1 width=224) + Number of rows:100 + Select Operator [SEL_145] (rows=1 width=224) + Output:["_col0"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + Select Operator [SEL_143] (rows=1 width=224) + Output:["_col1"] + Group By Operator [GBY_142] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_36] + Group By Operator [GBY_35] (rows=1 width=112) + Output:["_col0"],aggregations:["sum(_col2)"] + Select Operator [SEL_34] (rows=2478 width=112) + Output:["_col2"] + Filter Operator [FIL_33] (rows=2478 width=112) + predicate:(_col2 > _col5) + Merge Join Operator [MERGEJOIN_107] (rows=7434 width=112) + Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_104] (rows=15995224 width=115) + Conds:RS_131._col0=RS_110._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + PartitionCols:_col0 + Select Operator [SEL_109] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_108] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + PartitionCols:_col0 + Select Operator [SEL_130] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_129] (rows=143966864 width=119) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_123] (rows=669 width=4) + Output:["_col0"] + Select Operator [SEL_121] (rows=669 width=4) + Output:["_col0"] + Filter Operator [FIL_120] (rows=669 width=7) + predicate:(i_manufact_id = 269) + TableScan [TS_20] (rows=462000 width=7) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_119] + Group By Operator [GBY_118] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + Group By Operator [GBY_114] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_111] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 11 [ONE_TO_ONE_EDGE] + FORWARD [RS_31] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_106] (rows=97 width=116) + Conds:RS_141._col0=RS_122._col0(Inner),Output:["_col1","_col2"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_122] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_121] + <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_141] + PartitionCols:_col0 + Select Operator [SEL_140] (rows=6951 width=116) + Output:["_col0","_col1"] + Group By Operator [GBY_139] (rows=6951 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=55608 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Merge Join Operator [MERGEJOIN_105] (rows=15995224 width=115) + Conds:RS_138._col0=RS_112._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_109] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_136] (rows=143966864 width=119) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_30_web_sales_ws_item_sk_min) AND DynamicValue(RS_30_web_sales_ws_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_30_web_sales_ws_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_6] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_128] + Please refer to the previous Group By Operator [GBY_126] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_113] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_92] (rows=15995224 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_104] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out new file mode 100644 index 0000000000..caefc88c5a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 9 <- Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_81] + Limit [LIM_80] (rows=100 width=112) + Number of rows:100 + Select Operator [SEL_79] (rows=38308 width=112) + Output:["_col0","_col1"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] + Group By Operator [GBY_77] (rows=38308 width=112) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=38308 width=112) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col0 + Select Operator [SEL_14] (rows=15586502 width=3) + Output:["_col0","_col2"] + Merge Join Operator [MERGEJOIN_63] (rows=15586502 width=3) + Conds:RS_11._col0, _col2=RS_76._col0, _col2(Inner),Output:["_col3","_col6","_col8","_col9"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_11] + PartitionCols:_col0, _col2 + Merge Join Operator [MERGEJOIN_62] (rows=1522298 width=8) + Conds:RS_66._col1=RS_69._col0(Inner),Output:["_col0","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_66] + PartitionCols:_col1 + Select Operator [SEL_65] (rows=55574563 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_64] (rows=55574563 width=15) + predicate:sr_reason_sk is not null + TableScan [TS_0] (rows=57591150 width=15) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_69] + PartitionCols:_col0 + Select Operator [SEL_68] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_67] (rows=1 width=101) + predicate:(r_reason_desc = 'Did not like the warranty') + TableScan [TS_3] (rows=72 width=101) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] + PartitionCols:_col0, _col2 + Select Operator [SEL_75] (rows=575995635 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_74] (rows=575995635 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_11_store_returns_sr_item_sk_min) AND DynamicValue(RS_11_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_11_store_returns_sr_item_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_11_store_returns_sr_ticket_number_min) AND DynamicValue(RS_11_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_11_store_returns_sr_ticket_number_bloom_filter)))) + TableScan [TS_6] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_54] + Group By Operator [GBY_53] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_52] (rows=1522298 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_62] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_73] + Group By Operator [GBY_72] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_59] + Group By Operator [GBY_58] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_57] (rows=1522298 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_62] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out new file mode 100644 index 0000000000..8c813d84ee --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out @@ -0,0 +1,240 @@ +PREHOOK: query: explain +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_169] + Limit [LIM_168] (rows=1 width=240) + Number of rows:100 + Select Operator [SEL_167] (rows=1 width=240) + Output:["_col0","_col1","_col2"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + Select Operator [SEL_165] (rows=1 width=240) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_164] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] + Group By Operator [GBY_161] (rows=2511437 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=2511437 width=228) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 + Select Operator [SEL_37] (rows=5022875 width=230) + Output:["_col4","_col5","_col6"] + Filter Operator [FIL_36] (rows=5022875 width=230) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_125] (rows=14054072 width=230) + Conds:RS_33._col4=RS_160._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_160] + PartitionCols:_col0 + Select Operator [SEL_159] (rows=14398467 width=8) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=14398467 width=4) + default@web_returns,wr1,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_33] + PartitionCols:_col4 + Select Operator [SEL_32] (rows=5022875 width=231) + Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_124] (rows=5022875 width=235) + Conds:RS_29._col4=RS_158._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_123] (rows=5022875 width=231) + Conds:RS_18._col2=RS_144._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=12 width=91) + Output:["_col0"] + Filter Operator [FIL_142] (rows=12 width=92) + predicate:(web_company_name = 'pri') + TableScan [TS_9] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_122] (rows=15673790 width=235) + Conds:RS_15._col1=RS_136._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_135] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_134] (rows=784314 width=90) + predicate:(ca_state = 'TX') + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_121] (rows=15987241 width=239) + Conds:RS_152._col0=RS_128._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Select Operator [SEL_127] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_126] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=143895019 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_150] (rows=143895019 width=243) + predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_19_web_site_web_site_sk_min) AND DynamicValue(RS_19_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_19_web_site_web_site_sk_bloom_filter))) and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_129] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_127] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_141] + Group By Operator [GBY_140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_137] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_135] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_149] + Group By Operator [GBY_148] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + Group By Operator [GBY_146] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_145] (rows=12 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_143] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + PartitionCols:_col0 + Group By Operator [GBY_157] (rows=143966743 width=7) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_156] (rows=143966743 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=143966743 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_29_ws1_ws_order_number_min) AND DynamicValue(RS_29_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_29_ws1_ws_order_number_bloom_filter))) and ws_warehouse_sk is not null) + TableScan [TS_22] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_109] (rows=5022875 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_123] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out new file mode 100644 index 0000000000..c286c74e4a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out @@ -0,0 +1,271 @@ +PREHOOK: query: explain +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Map 20 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (ONE_TO_ONE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_265] + Limit [LIM_264] (rows=1 width=240) + Number of rows:100 + Select Operator [SEL_263] (rows=1 width=240) + Output:["_col0","_col1","_col2"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_262] + Select Operator [SEL_261] (rows=1 width=240) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_260] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_259] + Group By Operator [GBY_258] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] + Group By Operator [GBY_257] (rows=143895019 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Group By Operator [GBY_100] (rows=143895019 width=228) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 + Merge Join Operator [MERGEJOIN_219] (rows=83469759007 width=227) + Conds:RS_47._col3=RS_48._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 19 [ONE_TO_ONE_EDGE] + FORWARD [RS_48] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=1384229738 width=4) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_213] (rows=1384229738 width=4) + Conds:RS_31._col0=RS_256.wr_order_number(Inner),Output:["_col14"] + <-Reducer 18 [ONE_TO_ONE_EDGE] + FORWARD [RS_31] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=1411940834 width=4) + Output:["_col0"] + Filter Operator [FIL_28] (rows=1411940834 width=11) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_212] (rows=1411940834 width=11) + Conds:RS_252._col1=RS_255._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_252] + PartitionCols:_col1 + Select Operator [SEL_251] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_250] (rows=144002668 width=7) + predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_44_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_44_ws1_ws_order_number_min) AND DynamicValue(RS_44_ws1_ws_order_number_max)) + TableScan [TS_21] (rows=144002668 width=7) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_248] + Group By Operator [GBY_247] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_161] (rows=5022875 width=8) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_216] (rows=5022875 width=227) + Conds:RS_41._col2=RS_238._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col0 + Select Operator [SEL_237] (rows=12 width=4) + Output:["_col0"] + Filter Operator [FIL_236] (rows=12 width=92) + predicate:(web_company_name = 'pri') + TableScan [TS_9] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_215] (rows=15673790 width=231) + Conds:RS_38._col1=RS_230._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + PartitionCols:_col0 + Select Operator [SEL_229] (rows=784314 width=4) + Output:["_col0"] + Filter Operator [FIL_228] (rows=784314 width=90) + predicate:(ca_state = 'TX') + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_214] (rows=15987241 width=235) + Conds:RS_246._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] + PartitionCols:_col0 + Select Operator [SEL_221] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_220] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0 + Select Operator [SEL_245] (rows=143895019 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_244] (rows=143895019 width=239) + predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_39_customer_address_ca_address_sk_min) AND DynamicValue(RS_39_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_39_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_36_date_dim_d_date_sk_min) AND DynamicValue(RS_36_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_36_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_42_web_site_web_site_sk_min) AND DynamicValue(RS_42_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_42_web_site_web_site_sk_bloom_filter))) and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=239) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_223] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_221] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_231] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_229] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_243] + Group By Operator [GBY_242] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_239] (rows=12 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_237] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] + PartitionCols:_col1 + Select Operator [SEL_254] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_253] (rows=144002668 width=7) + predicate:(in_bloom_filter(ws_order_number, DynamicValue(RS_44_ws1_ws_order_number_bloom_filter)) and ws_order_number BETWEEN DynamicValue(RS_44_ws1_ws_order_number_min) AND DynamicValue(RS_44_ws1_ws_order_number_max)) + TableScan [TS_23] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_249] + Please refer to the previous Group By Operator [GBY_247] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_256] + PartitionCols:wr_order_number + TableScan [TS_30] (rows=14398467 width=4) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_47] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_218] (rows=482885639 width=227) + Conds:RS_44._col3=RS_45._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 18 [ONE_TO_ONE_EDGE] + FORWARD [RS_45] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_29] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col3 + Please refer to the previous Merge Join Operator [MERGEJOIN_216] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query96.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query96.q.out new file mode 100644 index 0000000000..c73f380118 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query96.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: explain +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_110] + Limit [LIM_109] (rows=1 width=16) + Number of rows:100 + Select Operator [SEL_108] (rows=1 width=16) + Output:["_col0"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + Select Operator [SEL_106] (rows=1 width=16) + Output:["_col1"] + Group By Operator [GBY_105] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_24] + Group By Operator [GBY_23] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_77] (rows=1084713 width=8) + Conds:RS_19._col0=RS_96._col0(Inner) + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_96] + PartitionCols:_col0 + Select Operator [SEL_95] (rows=155 width=4) + Output:["_col0"] + Filter Operator [FIL_94] (rows=155 width=92) + predicate:(s_store_name = 'ese') + TableScan [TS_16] (rows=1704 width=92) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_15] (rows=1477476 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_76] (rows=1477476 width=0) + Conds:RS_12._col1=RS_88._col0(Inner),Output:["_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_88] + PartitionCols:_col0 + Select Operator [SEL_87] (rows=655 width=4) + Output:["_col0"] + Filter Operator [FIL_86] (rows=655 width=8) + predicate:(hd_dep_count = 5) + TableScan [TS_6] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_75] (rows=16240953 width=0) + Conds:RS_104._col0=RS_80._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_80] + PartitionCols:_col0 + Select Operator [SEL_79] (rows=1515 width=4) + Output:["_col0"] + Filter Operator [FIL_78] (rows=1515 width=12) + predicate:((t_hour = 8) and (t_minute >= 30)) + TableScan [TS_3] (rows=86400 width=12) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_104] + PartitionCols:_col0 + Select Operator [SEL_103] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_102] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_13_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_13_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_13_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_10_time_dim_t_time_sk_min) AND DynamicValue(RS_10_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_10_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_20_store_s_store_sk_min) AND DynamicValue(RS_20_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_20_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_93] + Group By Operator [GBY_92] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_89] (rows=655 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_87] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_101] + Group By Operator [GBY_100] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_99] + Group By Operator [GBY_98] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_97] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_95] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_85] + Group By Operator [GBY_84] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_83] + Group By Operator [GBY_82] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_81] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_79] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query97.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query97.q.out new file mode 100644 index 0000000000..1e15c0decd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query97.q.out @@ -0,0 +1,166 @@ +PREHOOK: query: explain +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Map 11 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_96] + Limit [LIM_95] (rows=1 width=24) + Number of rows:100 + Group By Operator [GBY_94] (rows=1 width=24) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_34] + Group By Operator [GBY_33] (rows=1 width=24) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col0)","sum(_col1)","sum(_col2)"] + Select Operator [SEL_31] (rows=19216436912 width=7) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_69] (rows=19216436912 width=7) + Conds:RS_86._col0, _col1=RS_93._col0, _col1(Outer),Output:["_col0","_col2"] + <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_86] + PartitionCols:_col0, _col1 + Group By Operator [GBY_85] (rows=95493908 width=6) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=95493908 width=6) + Output:["_col0","_col1"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_67] (rows=95493908 width=6) + Conds:RS_84._col0=RS_72._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] + PartitionCols:_col0 + Select Operator [SEL_71] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_70] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=550076554 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_82] (rows=550076554 width=11) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_81] + Group By Operator [GBY_80] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] + Group By Operator [GBY_76] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_73] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_71] + <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_93] + PartitionCols:_col0, _col1 + Group By Operator [GBY_92] (rows=49393705 width=7) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0, _col1 + Group By Operator [GBY_24] (rows=49393705 width=7) + Output:["_col0","_col1"],keys:_col1, _col2 + Merge Join Operator [MERGEJOIN_68] (rows=49393705 width=7) + Conds:RS_91._col0=RS_74._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_71] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=286549727 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_89] (rows=286549727 width=11) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_14] (rows=287989836 width=11) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + Group By Operator [GBY_77] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_75] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_71] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out new file mode 100644 index 0000000000..3ffc379cd9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out @@ -0,0 +1,165 @@ +PREHOOK: query: explain +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_84] + Select Operator [SEL_83] (rows=138600 width=701) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_82] + Select Operator [SEL_81] (rows=138600 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + PTF Operator [PTF_80] (rows=138600 width=689) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] + Select Operator [SEL_79] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] + PartitionCols:_col1 + Group By Operator [GBY_77] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_16] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_57] (rows=18334631 width=577) + Conds:RS_12._col1=RS_68._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_68] + PartitionCols:_col0 + Select Operator [SEL_67] (rows=138600 width=581) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_66] (rows=138600 width=581) + predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') + TableScan [TS_6] (rows=462000 width=581) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_56] (rows=61115434 width=70) + Conds:RS_76._col0=RS_60._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_60] + PartitionCols:_col0 + Select Operator [SEL_59] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_58] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] + PartitionCols:_col0 + Select Operator [SEL_75] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_74] (rows=550076554 width=114) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_73] + Group By Operator [GBY_72] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_69] (rows=138600 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_67] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_65] + Group By Operator [GBY_64] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_63] + Group By Operator [GBY_62] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_61] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_59] + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out new file mode 100644 index 0000000000..972454ef4e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out @@ -0,0 +1,197 @@ +PREHOOK: query: explain +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_126] + Limit [LIM_125] (rows=100 width=590) + Number of rows:100 + Select Operator [SEL_124] (rows=644925 width=590) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + Select Operator [SEL_122] (rows=644925 width=590) + Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Group By Operator [GBY_121] (rows=644925 width=406) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_26] (rows=1289851 width=406) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col3, _col2, _col11 + Top N Key Operator [TNK_54] (rows=2579702 width=386) + keys:_col3, _col2, _col11,sort order:+++,top n:100 + Merge Join Operator [MERGEJOIN_98] (rows=2579702 width=386) + Conds:RS_22._col1=RS_120._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] + PartitionCols:_col0 + Select Operator [SEL_119] (rows=60 width=102) + Output:["_col0","_col1"] + TableScan [TS_17] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_97] (rows=2579702 width=289) + Conds:RS_19._col0=RS_108._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + PartitionCols:_col0 + Select Operator [SEL_107] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_106] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_14] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=15681873 width=295) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_96] (rows=15681873 width=295) + Conds:RS_10._col3=RS_118._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col8","_col10","_col12"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + PartitionCols:_col0 + Select Operator [SEL_117] (rows=27 width=188) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_95] (rows=15681873 width=114) + Conds:RS_116._col2=RS_100._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_100] + PartitionCols:_col0 + Select Operator [SEL_99] (rows=1 width=88) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=1 width=88) + default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_type"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + PartitionCols:_col2 + Select Operator [SEL_115] (rows=282273729 width=35) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_114] (rows=282273729 width=19) + predicate:((cs_ship_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk BETWEEN DynamicValue(RS_8_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_8_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_8_ship_mode_sm_ship_mode_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_0] (rows=287989836 width=19) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_109] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_107] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_105] + Group By Operator [GBY_104] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_103] + Group By Operator [GBY_102] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_101] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_99] + diff --git a/ql/src/test/results/clientpositive/perf/tez/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out index 5b77c8e4e4..a3de5c6945 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query1.q.out @@ -74,102 +74,102 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_161] - Limit [LIM_160] (rows=100 width=860) + Limit [LIM_160] (rows=100 width=100) Number of rows:100 - Select Operator [SEL_159] (rows=32266667 width=860) + Select Operator [SEL_159] (rows=816091 width=100) Output:["_col0"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_50] - Select Operator [SEL_49] (rows=32266667 width=860) + Select Operator [SEL_49] (rows=816091 width=100) Output:["_col0"] - Filter Operator [FIL_48] (rows=32266667 width=860) + Filter Operator [FIL_48] (rows=816091 width=324) predicate:(_col2 > _col7) - Merge Join Operator [MERGEJOIN_134] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_134] (rows=2448274 width=324) Conds:RS_45._col1=RS_158._col1(Inner),Output:["_col2","_col6","_col7"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_132] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_132] (rows=2369298 width=213) Conds:RS_42._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_152] (rows=80000000 width=860) + Select Operator [SEL_152] (rows=80000000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=80000000 width=860) + Filter Operator [FIL_151] (rows=80000000 width=104) predicate:c_customer_sk is not null - TableScan [TS_17] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"] + TableScan [TS_17] (rows=80000000 width=104) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_131] (rows=34842647 width=77) + Merge Join Operator [MERGEJOIN_131] (rows=2369298 width=114) Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_149] (rows=852 width=1910) + Select Operator [SEL_149] (rows=35 width=90) Output:["_col0"] - Filter Operator [FIL_148] (rows=852 width=1910) + Filter Operator [FIL_148] (rows=35 width=90) predicate:((s_state = 'NM') and s_store_sk is not null) - TableScan [TS_14] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + TableScan [TS_14] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] vectorized SHUFFLE [RS_147] PartitionCols:_col1 - Select Operator [SEL_146] (rows=31675133 width=77) + Select Operator [SEL_146] (rows=14291868 width=119) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_145] (rows=31675133 width=77) + Group By Operator [GBY_145] (rows=14291868 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=63350266 width=77) + Group By Operator [GBY_10] (rows=16855704 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_130] (rows=63350266 width=77) + Merge Join Operator [MERGEJOIN_130] (rows=16855704 width=107) Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_137] (rows=57591150 width=77) + Select Operator [SEL_137] (rows=51757026 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_135] (rows=57591150 width=77) + Filter Operator [FIL_135] (rows=51757026 width=119) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null) - TableScan [TS_0] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + TableScan [TS_0] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_142] (rows=36524 width=1119) + Select Operator [SEL_142] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_141] (rows=36524 width=1119) + Filter Operator [FIL_141] (rows=652 width=8) predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_158] PartitionCols:_col1 - Select Operator [SEL_157] (rows=15837566 width=77) + Select Operator [SEL_157] (rows=31 width=115) Output:["_col0","_col1"] - Group By Operator [GBY_156] (rows=15837566 width=77) + Group By Operator [GBY_156] (rows=31 width=123) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_155] (rows=31675133 width=77) + Select Operator [SEL_155] (rows=14291868 width=119) Output:["_col1","_col2"] - Group By Operator [GBY_154] (rows=31675133 width=77) + Group By Operator [GBY_154] (rows=14291868 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=63350266 width=77) + Group By Operator [GBY_30] (rows=17467258 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_133] (rows=63350266 width=77) + Merge Join Operator [MERGEJOIN_133] (rows=17467258 width=107) Conds:RS_140._col0=RS_144._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_138] (rows=57591150 width=77) + Select Operator [SEL_138] (rows=53634860 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_136] (rows=57591150 width=77) + Filter Operator [FIL_136] (rows=53634860 width=119) predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) Please refer to the previous TableScan [TS_0] <-Map 10 [SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out index 9a2a4bbec3..d049b2f28b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -133,213 +133,249 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 13 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Map 20 <- Reducer 16 (BROADCAST_EDGE) -Map 21 <- Reducer 19 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 12 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 18 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_225] - Limit [LIM_224] (rows=100 width=88) + Reducer 8 vectorized + File Output Operator [FS_237] + Limit [LIM_236] (rows=1 width=419) Number of rows:100 - Select Operator [SEL_223] (rows=1045432122 width=88) + Select Operator [SEL_235] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] - Select Operator [SEL_221] (rows=1045432122 width=88) + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + Select Operator [SEL_233] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_220] (rows=1045432122 width=88) + Group By Operator [GBY_232] (rows=1 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_63] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Group By Operator [GBY_62] (rows=2090864244 width=88) + Group By Operator [GBY_66] (rows=1 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Top N Key Operator [TNK_103] (rows=2090864244 width=88) + Top N Key Operator [TNK_103] (rows=58 width=379) keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100 - Select Operator [SEL_61] (rows=2090864244 width=88) + Select Operator [SEL_65] (rows=58 width=379) Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_60] (rows=2090864244 width=88) + Filter Operator [FIL_64] (rows=58 width=379) predicate:(_col15 is not null or _col17 is not null) - Merge Join Operator [MERGEJOIN_173] (rows=2090864244 width=88) - Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_211._col0(Left Outer),RS_55._col0=RS_219._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] - <-Reducer 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] + Merge Join Operator [MERGEJOIN_181] (rows=58 width=379) + Conds:RS_61._col0=RS_231._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_61] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_169] (rows=96800003 width=860) - Conds:RS_50._col1=RS_182._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_182] + Merge Join Operator [MERGEJOIN_180] (rows=58 width=379) + Conds:RS_58._col0=RS_221._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_58] PartitionCols:_col0 - Select Operator [SEL_181] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_180] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_168] (rows=88000001 width=860) - Conds:RS_176._col2=RS_179._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_176] - PartitionCols:_col2 - Select Operator [SEL_175] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_174] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_179] + Merge Join Operator [MERGEJOIN_179] (rows=7792 width=375) + Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col0 - Select Operator [SEL_178] (rows=40000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_177] (rows=40000000 width=1014) - predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0 - Group By Operator [GBY_54] (rows=633595212 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_170] (rows=633595212 width=88) - Conds:RS_203._col0=RS_185._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_185] - PartitionCols:_col0 - Select Operator [SEL_184] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_183] (rows=36524 width=1119) - predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + Merge Join Operator [MERGEJOIN_175] (rows=3914656 width=375) + Conds:RS_50._col1=RS_190._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=1861800 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_188] (rows=1861800 width=375) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=375) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_174] (rows=3860070 width=5) + Conds:RS_184._col2=RS_187._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_184] + PartitionCols:_col2 + Select Operator [SEL_183] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_182] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col0 + Select Operator [SEL_186] (rows=2000000 width=102) + Output:["_col0"] + Filter Operator [FIL_185] (rows=2000000 width=102) + predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=102) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_202] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_201] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_198] - Group By Operator [GBY_197] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_194] - Group By Operator [GBY_191] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_186] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_184] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_200] - Group By Operator [GBY_199] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_136] - Group By Operator [GBY_135] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"] - Select Operator [SEL_134] (rows=96800003 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_169] - <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_211] + Group By Operator [GBY_54] (rows=155827 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=57825495 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_176] (rows=57825495 width=2) + Conds:RS_211._col0=RS_193._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_193] + PartitionCols:_col0 + Select Operator [SEL_192] (rows=201 width=12) + Output:["_col0"] + Filter Operator [FIL_191] (rows=201 width=12) + predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_211] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_209] (rows=525327388 width=7) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_208] + Group By Operator [GBY_207] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3647763)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3647763)"] + Select Operator [SEL_134] (rows=3914656 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_175] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_206] + Group By Operator [GBY_205] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_194] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_192] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_221] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=155827 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_219] (rows=155827 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=155827 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=15843227 width=3) + Conds:RS_218._col0=RS_195._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_195] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_192] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] + PartitionCols:_col0 + Select Operator [SEL_217] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_216] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_58_c_c_customer_sk_min) AND DynamicValue(RS_58_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_58_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_215] + Group By Operator [GBY_214] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_149] (rows=7792 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_179] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_196] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_192] + <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_231] PartitionCols:_col0 - Select Operator [SEL_210] (rows=79201469 width=135) + Select Operator [SEL_230] (rows=154725 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_209] (rows=79201469 width=135) + Group By Operator [GBY_229] (rows=154725 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_30] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0 - Group By Operator [GBY_29] (rows=158402938 width=135) + Group By Operator [GBY_43] (rows=154725 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_171] (rows=158402938 width=135) - Conds:RS_208._col0=RS_187._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_187] + Merge Join Operator [MERGEJOIN_178] (rows=31162251 width=3) + Conds:RS_228._col0=RS_197._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_197] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_184] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_208] + Please refer to the previous Select Operator [SEL_192] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_207] (rows=144002668 width=135) + Select Operator [SEL_227] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_206] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_205] - Group By Operator [GBY_204] (rows=1 width=12) + Filter Operator [FIL_226] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_61_c_c_customer_sk_min) AND DynamicValue(RS_61_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_61_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_33] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_223] + Group By Operator [GBY_222] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] - Group By Operator [GBY_192] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_204] + Group By Operator [GBY_201] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_188] (rows=36524 width=1119) + Select Operator [SEL_198] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_184] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_219] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=158394413 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_217] (rows=158394413 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col0 - Group By Operator [GBY_43] (rows=316788826 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_172] (rows=316788826 width=135) - Conds:RS_216._col0=RS_189._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_189] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_184] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] - PartitionCols:_col0 - Select Operator [SEL_215] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_214] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_33] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_192] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_196] - Group By Operator [GBY_193] (rows=1 width=12) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_190] (rows=36524 width=1119) + Select Operator [SEL_164] (rows=58 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_184] + Please refer to the previous Merge Join Operator [MERGEJOIN_180] diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/query11.q.out index 2aa09d48f4..2f453f3ab0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out @@ -159,291 +159,279 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 21 (BROADCAST_EDGE) -Map 11 <- Reducer 23 (BROADCAST_EDGE) -Map 15 <- Reducer 20 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) -Map 7 <- Reducer 22 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 17 <- Map 24 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 23 (BROADCAST_EDGE) +Map 13 <- Reducer 25 (BROADCAST_EDGE) +Map 17 <- Reducer 22 (BROADCAST_EDGE) +Map 9 <- Reducer 24 (BROADCAST_EDGE) +Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 19 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_360] - Limit [LIM_359] (rows=100 width=88) + Reducer 8 vectorized + File Output Operator [FS_358] + Limit [LIM_357] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_358] (rows=574987679 width=88) + Select Operator [SEL_356] (rows=12248093 width=85) Output:["_col0"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_93] - Select Operator [SEL_92] (rows=574987679 width=88) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_97] + Select Operator [SEL_96] (rows=12248093 width=85) Output:["_col0"] - Filter Operator [FIL_91] (rows=574987679 width=88) + Filter Operator [FIL_95] (rows=12248093 width=533) predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > (_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_283] (rows=1149975359 width=88) - Conds:RS_325._col0=RS_337._col0(Inner),RS_337._col0=RS_347._col0(Inner),RS_337._col0=RS_357._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] + Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=533) + Conds:RS_92._col2=RS_355._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_355] PartitionCols:_col0 - Select Operator [SEL_336] (rows=116159124 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_335] (rows=116159124 width=88) - predicate:(_col7 > 0) - Select Operator [SEL_334] (rows=348477374 width=88) - Output:["_col0","_col7"] - Group By Operator [GBY_333] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_38] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_36] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_278] (rows=696954748 width=88) - Conds:RS_33._col1=RS_316._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] - PartitionCols:_col0 - Select Operator [SEL_312] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_311] (rows=80000000 width=860) - predicate:(c_customer_id is not null and c_customer_sk is not null) - TableScan [TS_71] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_277] (rows=633595212 width=88) - Conds:RS_332._col0=RS_294._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] - PartitionCols:_col0 - Select Operator [SEL_288] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_285] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_68] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] - PartitionCols:_col0 - Select Operator [SEL_331] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_330] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_34_customer_c_customer_sk_min) AND DynamicValue(RS_34_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_34_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_21] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_327] - Group By Operator [GBY_326] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_295] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_329] - Group By Operator [GBY_328] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] - Group By Operator [GBY_320] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_317] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_312] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] - PartitionCols:_col0 - Select Operator [SEL_346] (rows=29040539 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_345] (rows=29040539 width=135) - predicate:(_col7 > 0) - Select Operator [SEL_344] (rows=87121617 width=135) - Output:["_col0","_col7"] - Group By Operator [GBY_343] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_60] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_58] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_280] (rows=174243235 width=135) - Conds:RS_55._col1=RS_318._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_279] (rows=158402938 width=135) - Conds:RS_342._col0=RS_296._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_296] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_286] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_68] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] - PartitionCols:_col0 - Select Operator [SEL_341] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_340] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_43] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_339] - Group By Operator [GBY_338] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_297] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - PartitionCols:_col0 - Select Operator [SEL_356] (rows=348477374 width=88) + Select Operator [SEL_354] (rows=80000000 width=297) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_355] (rows=348477374 width=88) + Group By Operator [GBY_353] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_83] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_82] (rows=696954748 width=88) + Group By Operator [GBY_82] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_80] (rows=696954748 width=88) + Select Operator [SEL_80] (rows=187573258 width=847) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_282] (rows=696954748 width=88) - Conds:RS_77._col1=RS_313._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] + Merge Join Operator [MERGEJOIN_288] (rows=187573258 width=847) + Conds:RS_77._col1=RS_321._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 16 [SIMPLE_EDGE] + Select Operator [SEL_320] (rows=80000000 width=656) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_319] (rows=80000000 width=656) + predicate:(c_customer_id is not null and c_customer_sk is not null) + TableScan [TS_71] (rows=80000000 width=656) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_281] (rows=633595212 width=88) - Conds:RS_354._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + Merge Join Operator [MERGEJOIN_287] (rows=187573258 width=199) + Conds:RS_352._col0=RS_298._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_298] PartitionCols:_col0 - Select Operator [SEL_287] (rows=36524 width=1119) + Select Operator [SEL_295] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_284] (rows=36524 width=1119) + Filter Operator [FIL_292] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_68] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] + TableScan [TS_68] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_352] PartitionCols:_col0 - Select Operator [SEL_353] (rows=575995635 width=88) + Select Operator [SEL_351] (rows=525327388 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_352] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_78_customer_c_customer_sk_min) AND DynamicValue(RS_78_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_78_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_65] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_350] (rows=525327388 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_65] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 22 [BROADCAST_EDGE] vectorized BROADCAST [RS_349] Group By Operator [GBY_348] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] - Group By Operator [GBY_298] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_287] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_351] - Group By Operator [GBY_350] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] - Group By Operator [GBY_319] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_314] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_312] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] - PartitionCols:_col0 - Select Operator [SEL_324] (rows=87121617 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_323] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_17] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_15] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_276] (rows=174243235 width=135) - Conds:RS_12._col1=RS_315._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_275] (rows=158402938 width=135) - Conds:RS_310._col0=RS_292._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_287] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - PartitionCols:_col0 - Select Operator [SEL_309] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_308] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_307] - Group By Operator [GBY_306] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] - Group By Operator [GBY_299] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_310] + Group By Operator [GBY_306] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=36524 width=1119) + Select Operator [SEL_299] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_287] + Please refer to the previous Select Operator [SEL_295] + <-Reducer 6 [ONE_TO_ONE_EDGE] + FORWARD [RS_92] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_290] (rows=20485011 width=436) + Conds:RS_89._col2=RS_347._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_347] + PartitionCols:_col0 + Select Operator [SEL_346] (rows=17130654 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_345] (rows=17130654 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_344] (rows=51391963 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_343] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_60] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_58] (rows=51391963 width=875) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_286] (rows=51391963 width=875) + Conds:RS_55._col1=RS_324._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_320] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_285] (rows=51391963 width=227) + Conds:RS_342._col0=RS_304._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + PartitionCols:_col0 + Select Operator [SEL_297] (rows=652 width=8) + Output:["_col0"] + Filter Operator [FIL_294] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_68] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] + PartitionCols:_col0 + Select Operator [SEL_341] (rows=143930993 width=231) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_340] (rows=143930993 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_43] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_339] + Group By Operator [GBY_338] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_313] + Group By Operator [GBY_309] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_305] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_297] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_89] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_289] (rows=31888273 width=324) + Conds:RS_327._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_337] + PartitionCols:_col0 + Select Operator [SEL_336] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_335] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_334] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_333] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_38] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_36] (rows=187573258 width=847) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_284] (rows=187573258 width=847) + Conds:RS_33._col1=RS_323._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_320] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_283] (rows=187573258 width=199) + Conds:RS_332._col0=RS_302._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_302] + PartitionCols:_col0 + Select Operator [SEL_296] (rows=652 width=8) + Output:["_col0"] + Filter Operator [FIL_293] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_68] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=525327388 width=221) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_330] (rows=525327388 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_21] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_329] + Group By Operator [GBY_328] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_312] + Group By Operator [GBY_308] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_303] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_296] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] + PartitionCols:_col0 + Select Operator [SEL_326] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_325] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_17] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_15] (rows=51391963 width=875) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_282] (rows=51391963 width=875) + Conds:RS_12._col1=RS_322._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_320] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_281] (rows=51391963 width=227) + Conds:RS_318._col0=RS_300._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_300] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_295] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0 + Select Operator [SEL_317] (rows=143930993 width=231) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_316] (rows=143930993 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_315] + Group By Operator [GBY_314] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + Group By Operator [GBY_307] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_301] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_295] diff --git a/ql/src/test/results/clientpositive/perf/tez/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/query12.q.out index 911b4d10fe..75156be9fd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query12.q.out @@ -88,62 +88,62 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_86] - Limit [LIM_85] (rows=100 width=135) + Limit [LIM_85] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_84] (rows=87121617 width=135) + Select Operator [SEL_84] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_83] - Select Operator [SEL_82] (rows=87121617 width=135) + Select Operator [SEL_82] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_81] (rows=87121617 width=135) + PTF Operator [PTF_81] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_80] (rows=87121617 width=135) + Select Operator [SEL_80] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_79] PartitionCols:_col1 - Group By Operator [GBY_78] (rows=87121617 width=135) + Group By Operator [GBY_78] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=174243235 width=135) + Group By Operator [GBY_16] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_58] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_58] (rows=4798568 width=689) Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_69] PartitionCols:_col0 - Select Operator [SEL_68] (rows=462000 width=1436) + Select Operator [SEL_68] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_67] (rows=462000 width=1436) + Filter Operator [FIL_67] (rows=138600 width=581) predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + TableScan [TS_6] (rows=462000 width=581) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_57] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_57] (rows=15995224 width=115) Conds:RS_77._col0=RS_61._col0(Inner),Output:["_col1","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_60] (rows=8116 width=1119) + Select Operator [SEL_60] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_59] (rows=8116 width=1119) + Filter Operator [FIL_59] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_77] PartitionCols:_col0 - Select Operator [SEL_76] (rows=144002668 width=135) + Select Operator [SEL_76] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_75] (rows=144002668 width=135) + Filter Operator [FIL_75] (rows=143966864 width=119) predicate:((ws_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_74] Group By Operator [GBY_73] (rows=1 width=12) @@ -152,7 +152,7 @@ Stage-0 SHUFFLE [RS_72] Group By Operator [GBY_71] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_70] (rows=462000 width=1436) + Select Operator [SEL_70] (rows=138600 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_68] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -163,7 +163,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_64] Group By Operator [GBY_63] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_62] (rows=8116 width=1119) + Select Operator [SEL_62] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_60] diff --git a/ql/src/test/results/clientpositive/perf/tez/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out index 53edb5e94d..0e02e9aaa1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -115,17 +115,17 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -133,7 +133,7 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_162] - Select Operator [SEL_161] (rows=1 width=256) + Select Operator [SEL_161] (rows=1 width=344) Output:["_col0","_col1","_col2","_col3"] Group By Operator [GBY_160] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] @@ -141,130 +141,130 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_37] Group By Operator [GBY_36] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)"] - Select Operator [SEL_35] (rows=1431552 width=88) + Select Operator [SEL_35] (rows=4851 width=183) Output:["_col6","_col8","_col9"] - Filter Operator [FIL_34] (rows=1431552 width=88) + Filter Operator [FIL_34] (rows=4851 width=183) predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) - Merge Join Operator [MERGEJOIN_121] (rows=34357287 width=88) - Conds:RS_31._col2=RS_148._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] + Merge Join Operator [MERGEJOIN_121] (rows=58239 width=183) + Conds:RS_31._col2=RS_151._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_151] PartitionCols:_col0 - Select Operator [SEL_147] (rows=1861800 width=385) + Select Operator [SEL_150] (rows=265971 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_146] (rows=1861800 width=385) + Filter Operator [FIL_149] (rows=265971 width=183) predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + TableScan [TS_15] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col2 - Filter Operator [FIL_30] (rows=31233897 width=88) + Filter Operator [FIL_30] (rows=58239 width=90) predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_120] (rows=93701693 width=88) - Conds:RS_27._col4=RS_159._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + Merge Join Operator [MERGEJOIN_120] (rows=291204 width=90) + Conds:RS_27._col4=RS_143._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_158] (rows=20000000 width=1014) + Select Operator [SEL_142] (rows=3529412 width=187) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=20000000 width=1014) + Filter Operator [FIL_141] (rows=3529412 width=187) predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + TableScan [TS_12] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_119] (rows=85183356 width=88) - Conds:RS_24._col3=RS_140._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] + Merge Join Operator [MERGEJOIN_119] (rows=3300311 width=145) + Conds:RS_24._col3=RS_135._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_139] (rows=7200 width=107) + Select Operator [SEL_134] (rows=1309 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=7200 width=107) + Filter Operator [FIL_133] (rows=1309 width=8) predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + TableScan [TS_9] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_118] (rows=77439413 width=88) - Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] + Merge Join Operator [MERGEJOIN_118] (rows=18152968 width=405) + Conds:RS_21._col1=RS_127._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] PartitionCols:_col0 - Select Operator [SEL_131] (rows=36524 width=1119) + Select Operator [SEL_126] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_130] (rows=36524 width=1119) + Filter Operator [FIL_125] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=70399465 width=88) - Conds:RS_124._col0=RS_156._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_117] (rows=50840141 width=446) + Conds:RS_124._col0=RS_159._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] + SHUFFLE [RS_124] PartitionCols:_col0 - Select Operator [SEL_123] (rows=1704 width=1910) + Select Operator [SEL_123] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_122] (rows=1704 width=1910) + Filter Operator [FIL_122] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + TableScan [TS_0] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] PartitionCols:_col4 - Select Operator [SEL_155] (rows=63999513 width=88) + Select Operator [SEL_158] (rows=50840141 width=450) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_154] (rows=63999513 width=88) - predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_32_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_32_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_32_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_137] - Group By Operator [GBY_136] (rows=1 width=12) + Filter Operator [FIL_157] (rows=50840141 width=450) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_28_customer_address_ca_address_sk_min) AND DynamicValue(RS_28_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_28_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_32_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_32_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_32_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=450) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=36524 width=1119) + Select Operator [SEL_128] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_131] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_126] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_140] + Group By Operator [GBY_139] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + Group By Operator [GBY_137] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=7200 width=107) + Select Operator [SEL_136] (rows=1309 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_139] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_153] - Group By Operator [GBY_152] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - Group By Operator [GBY_150] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_149] (rows=1861800 width=385) + Please refer to the previous Select Operator [SEL_134] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_148] + Group By Operator [GBY_147] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3529412)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3529412)"] + Select Operator [SEL_144] (rows=3529412 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_147] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_142] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=1704 width=1910) + Select Operator [SEL_152] (rows=265971 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] + Please refer to the previous Select Operator [SEL_150] diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out index 228897c41c..c078c271ec 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -1,6 +1,9 @@ -Warning: Shuffle Join MERGEJOIN[1455][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[1468][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[1481][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[1446][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1458][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1448][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[1471][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[1450][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1484][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -223,17 +226,18 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 108 (BROADCAST_EDGE) -Map 100 <- Reducer 103 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) -Map 110 <- Reducer 105 (BROADCAST_EDGE), Reducer 74 (BROADCAST_EDGE) -Map 111 <- Reducer 107 (BROADCAST_EDGE), Reducer 77 (BROADCAST_EDGE) -Map 112 <- Reducer 58 (BROADCAST_EDGE), Reducer 81 (BROADCAST_EDGE), Reducer 98 (BROADCAST_EDGE) -Map 113 <- Reducer 62 (BROADCAST_EDGE), Reducer 91 (BROADCAST_EDGE), Reducer 99 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 33 <- Reducer 38 (BROADCAST_EDGE) -Map 43 <- Reducer 109 (BROADCAST_EDGE) -Map 47 <- Reducer 26 (BROADCAST_EDGE) -Map 48 <- Reducer 40 (BROADCAST_EDGE) -Map 49 <- Reducer 54 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE), Reducer 78 (BROADCAST_EDGE) +Map 100 <- Reducer 103 (BROADCAST_EDGE) +Map 110 <- Reducer 105 (BROADCAST_EDGE) +Map 111 <- Reducer 107 (BROADCAST_EDGE) +Map 112 <- Reducer 63 (BROADCAST_EDGE), Reducer 83 (BROADCAST_EDGE) +Map 113 <- Reducer 68 (BROADCAST_EDGE), Reducer 93 (BROADCAST_EDGE) +Map 20 <- Reducer 25 (BROADCAST_EDGE) +Map 36 <- Reducer 41 (BROADCAST_EDGE) +Map 46 <- Reducer 109 (BROADCAST_EDGE) +Map 50 <- Reducer 29 (BROADCAST_EDGE) +Map 51 <- Reducer 43 (BROADCAST_EDGE) +Map 52 <- Reducer 58 (BROADCAST_EDGE), Reducer 72 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 102 (SIMPLE_EDGE), Union 11 (CONTAINS) Reducer 101 <- Map 100 (SIMPLE_EDGE), Map 102 (SIMPLE_EDGE) Reducer 103 <- Map 102 (CUSTOM_SIMPLE_EDGE) Reducer 104 <- Map 102 (SIMPLE_EDGE), Map 110 (SIMPLE_EDGE) @@ -242,1225 +246,1185 @@ Reducer 106 <- Map 102 (SIMPLE_EDGE), Map 111 (SIMPLE_EDGE) Reducer 107 <- Map 102 (CUSTOM_SIMPLE_EDGE) Reducer 108 <- Map 102 (CUSTOM_SIMPLE_EDGE) Reducer 109 <- Map 102 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Union 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 57 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 13 <- Map 1 (SIMPLE_EDGE), Map 102 (SIMPLE_EDGE), Union 14 (CONTAINS) -Reducer 15 <- Union 14 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 61 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 12 <- Union 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 62 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Map 102 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 17 <- Union 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 67 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 102 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 20 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE), Union 14 (CONTAINS) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 25 <- Union 24 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 21 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Union 28 (CUSTOM_SIMPLE_EDGE) -Reducer 30 <- Map 21 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE), Union 11 (CONTAINS) +Reducer 23 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 24 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 28 <- Union 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 24 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE), Union 31 (CONTAINS) Reducer 32 <- Union 31 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 35 <- Map 33 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE), Union 10 (CONTAINS) -Reducer 36 <- Map 33 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE), Union 14 (CONTAINS) -Reducer 38 <- Map 37 (CUSTOM_SIMPLE_EDGE) -Reducer 39 <- Map 37 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE), Union 24 (CONTAINS) +Reducer 33 <- Map 24 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 35 <- Union 34 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 36 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 38 <- Map 36 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE), Union 11 (CONTAINS) +Reducer 39 <- Map 36 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE), Union 16 (CONTAINS) Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 40 <- Map 37 (CUSTOM_SIMPLE_EDGE) -Reducer 41 <- Map 37 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 42 <- Map 37 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 44 <- Map 102 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 45 <- Map 102 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 46 <- Map 102 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 5 <- Reducer 25 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 52 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) -Reducer 51 <- Map 63 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE), Reducer 65 (ONE_TO_ONE_EDGE) -Reducer 52 <- Reducer 51 (SIMPLE_EDGE) -Reducer 54 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 55 <- Map 112 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) -Reducer 56 <- Map 63 (SIMPLE_EDGE), Reducer 55 (SIMPLE_EDGE), Reducer 80 (ONE_TO_ONE_EDGE) -Reducer 57 <- Reducer 56 (SIMPLE_EDGE) -Reducer 58 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 113 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) -Reducer 60 <- Map 63 (SIMPLE_EDGE), Reducer 59 (SIMPLE_EDGE), Reducer 90 (ONE_TO_ONE_EDGE) -Reducer 61 <- Reducer 60 (SIMPLE_EDGE) -Reducer 62 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 64 <- Map 63 (SIMPLE_EDGE), Reducer 70 (ONE_TO_ONE_EDGE) -Reducer 65 <- Reducer 64 (SIMPLE_EDGE) -Reducer 66 <- Reducer 65 (CUSTOM_SIMPLE_EDGE) -Reducer 67 <- Map 63 (SIMPLE_EDGE), Reducer 101 (SIMPLE_EDGE) -Reducer 68 <- Reducer 67 (SIMPLE_EDGE), Union 69 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) -Reducer 70 <- Union 69 (SIMPLE_EDGE) -Reducer 71 <- Map 63 (CUSTOM_SIMPLE_EDGE) -Reducer 72 <- Map 63 (SIMPLE_EDGE), Reducer 104 (SIMPLE_EDGE) -Reducer 73 <- Reducer 72 (SIMPLE_EDGE), Union 69 (CONTAINS) -Reducer 74 <- Map 63 (CUSTOM_SIMPLE_EDGE) -Reducer 75 <- Map 63 (SIMPLE_EDGE), Reducer 106 (SIMPLE_EDGE) -Reducer 76 <- Reducer 75 (SIMPLE_EDGE), Union 69 (CONTAINS) -Reducer 77 <- Map 63 (CUSTOM_SIMPLE_EDGE) -Reducer 78 <- Map 63 (CUSTOM_SIMPLE_EDGE) -Reducer 79 <- Map 63 (SIMPLE_EDGE), Reducer 85 (ONE_TO_ONE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 80 <- Reducer 79 (SIMPLE_EDGE) -Reducer 81 <- Reducer 80 (CUSTOM_SIMPLE_EDGE) -Reducer 82 <- Map 63 (SIMPLE_EDGE), Reducer 101 (SIMPLE_EDGE) -Reducer 83 <- Reducer 82 (SIMPLE_EDGE), Union 84 (CONTAINS) -Reducer 85 <- Union 84 (SIMPLE_EDGE) -Reducer 86 <- Reducer 82 (SIMPLE_EDGE), Union 87 (CONTAINS) -Reducer 88 <- Union 87 (SIMPLE_EDGE) -Reducer 89 <- Map 63 (SIMPLE_EDGE), Reducer 88 (ONE_TO_ONE_EDGE) -Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 102 (SIMPLE_EDGE), Union 10 (CONTAINS) -Reducer 90 <- Reducer 89 (SIMPLE_EDGE) -Reducer 91 <- Reducer 90 (CUSTOM_SIMPLE_EDGE) -Reducer 92 <- Map 63 (SIMPLE_EDGE), Reducer 104 (SIMPLE_EDGE) -Reducer 93 <- Reducer 92 (SIMPLE_EDGE), Union 84 (CONTAINS) -Reducer 94 <- Reducer 92 (SIMPLE_EDGE), Union 87 (CONTAINS) -Reducer 95 <- Map 63 (SIMPLE_EDGE), Reducer 106 (SIMPLE_EDGE) -Reducer 96 <- Reducer 95 (SIMPLE_EDGE), Union 84 (CONTAINS) -Reducer 97 <- Reducer 95 (SIMPLE_EDGE), Union 87 (CONTAINS) -Reducer 98 <- Map 63 (CUSTOM_SIMPLE_EDGE) -Reducer 99 <- Map 63 (CUSTOM_SIMPLE_EDGE) +Reducer 41 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 42 <- Map 40 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 43 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 40 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 45 <- Map 40 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 47 <- Map 102 (SIMPLE_EDGE), Map 46 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 48 <- Map 102 (SIMPLE_EDGE), Map 46 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 49 <- Map 102 (SIMPLE_EDGE), Map 46 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 5 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 54 <- Map 69 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 55 <- Reducer 54 (ONE_TO_ONE_EDGE), Reducer 71 (ONE_TO_ONE_EDGE) +Reducer 56 <- Reducer 55 (SIMPLE_EDGE) +Reducer 58 <- Map 57 (CUSTOM_SIMPLE_EDGE) +Reducer 59 <- Map 112 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 60 <- Map 69 (SIMPLE_EDGE), Reducer 59 (SIMPLE_EDGE) +Reducer 61 <- Reducer 60 (ONE_TO_ONE_EDGE), Reducer 82 (ONE_TO_ONE_EDGE) +Reducer 62 <- Reducer 61 (SIMPLE_EDGE) +Reducer 63 <- Map 57 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 113 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 65 <- Map 69 (SIMPLE_EDGE), Reducer 64 (SIMPLE_EDGE) +Reducer 66 <- Reducer 65 (ONE_TO_ONE_EDGE), Reducer 92 (ONE_TO_ONE_EDGE) +Reducer 67 <- Reducer 66 (SIMPLE_EDGE) +Reducer 68 <- Map 57 (CUSTOM_SIMPLE_EDGE) +Reducer 70 <- Map 69 (SIMPLE_EDGE), Reducer 76 (ONE_TO_ONE_EDGE) +Reducer 71 <- Reducer 70 (SIMPLE_EDGE) +Reducer 72 <- Reducer 71 (CUSTOM_SIMPLE_EDGE) +Reducer 73 <- Map 69 (SIMPLE_EDGE), Reducer 101 (SIMPLE_EDGE) +Reducer 74 <- Reducer 73 (SIMPLE_EDGE), Union 75 (CONTAINS) +Reducer 76 <- Union 75 (SIMPLE_EDGE) +Reducer 77 <- Map 69 (SIMPLE_EDGE), Reducer 104 (SIMPLE_EDGE) +Reducer 78 <- Reducer 77 (SIMPLE_EDGE), Union 75 (CONTAINS) +Reducer 79 <- Map 69 (SIMPLE_EDGE), Reducer 106 (SIMPLE_EDGE) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 80 <- Reducer 79 (SIMPLE_EDGE), Union 75 (CONTAINS) +Reducer 81 <- Map 69 (SIMPLE_EDGE), Reducer 87 (ONE_TO_ONE_EDGE) +Reducer 82 <- Reducer 81 (SIMPLE_EDGE) +Reducer 83 <- Reducer 82 (CUSTOM_SIMPLE_EDGE) +Reducer 84 <- Map 69 (SIMPLE_EDGE), Reducer 101 (SIMPLE_EDGE) +Reducer 85 <- Reducer 84 (SIMPLE_EDGE), Union 86 (CONTAINS) +Reducer 87 <- Union 86 (SIMPLE_EDGE) +Reducer 88 <- Reducer 84 (SIMPLE_EDGE), Union 89 (CONTAINS) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 90 <- Union 89 (SIMPLE_EDGE) +Reducer 91 <- Map 69 (SIMPLE_EDGE), Reducer 90 (ONE_TO_ONE_EDGE) +Reducer 92 <- Reducer 91 (SIMPLE_EDGE) +Reducer 93 <- Reducer 92 (CUSTOM_SIMPLE_EDGE) +Reducer 94 <- Map 69 (SIMPLE_EDGE), Reducer 104 (SIMPLE_EDGE) +Reducer 95 <- Reducer 94 (SIMPLE_EDGE), Union 86 (CONTAINS) +Reducer 96 <- Reducer 94 (SIMPLE_EDGE), Union 89 (CONTAINS) +Reducer 97 <- Map 69 (SIMPLE_EDGE), Reducer 106 (SIMPLE_EDGE) +Reducer 98 <- Reducer 97 (SIMPLE_EDGE), Union 86 (CONTAINS) +Reducer 99 <- Reducer 97 (SIMPLE_EDGE), Union 89 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_1743] - Limit [LIM_1742] (rows=100 width=237) + Reducer 9 vectorized + File Output Operator [FS_1726] + Limit [LIM_1725] (rows=7 width=192) Number of rows:100 - Select Operator [SEL_1741] (rows=1016388080 width=237) + Select Operator [SEL_1724] (rows=7 width=192) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1740] - Select Operator [SEL_1739] (rows=1016388080 width=237) + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1723] + Select Operator [SEL_1722] (rows=7 width=192) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_1738] (rows=1016388080 width=237) + Group By Operator [GBY_1721] (rows=7 width=200) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_1474] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] + Reduce Output Operator [RS_1477] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1473] (rows=2032776160 width=237) + Group By Operator [GBY_1476] (rows=7 width=200) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1472] (rows=406555232 width=237) + Top N Key Operator [TNK_1475] (rows=3 width=221) keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100 - Select Operator [SEL_1470] (rows=116155905 width=264) + Select Operator [SEL_1473] (rows=1 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1469] (rows=116155905 width=264) + Filter Operator [FIL_1472] (rows=1 width=244) predicate:(_col5 > _col1) - Merge Join Operator [MERGEJOIN_1468] (rows=348467716 width=264) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1749] - Select Operator [SEL_1748] (rows=1 width=8) - Filter Operator [FIL_1747] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_1746] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1745] (rows=1 width=8) - Group By Operator [GBY_1744] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Union 10 [CUSTOM_SIMPLE_EDGE] - <-Reducer 19 [CONTAINS] - Reduce Output Operator [RS_1499] - Group By Operator [GBY_1498] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1497] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1495] (rows=316788826 width=135) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1494] (rows=316788826 width=135) - Conds:RS_1821._col0=RS_1808._col0(Inner),Output:["_col1"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1808] - PartitionCols:_col0 - Select Operator [SEL_1803] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_1802] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) - TableScan [TS_13] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1821] - PartitionCols:_col0 - Select Operator [SEL_1819] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_1818] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_10] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_quantity"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1817] - Group By Operator [GBY_1816] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1814] - Group By Operator [GBY_1812] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1805] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1803] - <-Reducer 35 [CONTAINS] - Reduce Output Operator [RS_1535] - Group By Operator [GBY_1534] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1533] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1531] (rows=158402938 width=135) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1530] (rows=158402938 width=135) - Conds:RS_1849._col0=RS_1836._col0(Inner),Output:["_col1"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1836] - PartitionCols:_col0 - Select Operator [SEL_1831] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_1830] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) - TableScan [TS_24] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1849] - PartitionCols:_col0 - Select Operator [SEL_1847] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_1846] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_21] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_quantity"] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1845] - Group By Operator [GBY_1844] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1842] - Group By Operator [GBY_1840] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1833] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1831] - <-Reducer 9 [CONTAINS] - Reduce Output Operator [RS_1467] - Group By Operator [GBY_1466] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1465] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1463] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1462] (rows=633595212 width=88) - Conds:RS_1645._col0=RS_1626._col0(Inner),Output:["_col1"] - <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1626] - PartitionCols:_col0 - Select Operator [SEL_1615] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_1614] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_year BETWEEN 1999 AND 2001) - TableScan [TS_97] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1645] - PartitionCols:_col0 - Select Operator [SEL_1643] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_1642] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_quantity"] - <-Reducer 108 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1641] - Group By Operator [GBY_1640] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1638] - Group By Operator [GBY_1633] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1623] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1615] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1752] - Select Operator [SEL_1751] (rows=1 width=120) - Output:["_col0"] - Group By Operator [GBY_1750] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 28 [CUSTOM_SIMPLE_EDGE] - <-Reducer 27 [CONTAINS] - Reduce Output Operator [RS_1517] - Group By Operator [GBY_1516] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1515] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1513] (rows=316788826 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1512] (rows=316788826 width=135) - Conds:RS_1828._col0=RS_1809._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1809] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1803] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1828] - PartitionCols:_col0 - Select Operator [SEL_1826] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1825] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_62_date_dim_d_date_sk_min) AND DynamicValue(RS_62_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_62_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_55] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1824] - Group By Operator [GBY_1823] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1815] - Group By Operator [GBY_1813] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1807] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1803] - <-Reducer 41 [CONTAINS] - Reduce Output Operator [RS_1553] - Group By Operator [GBY_1552] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1551] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1549] (rows=158402938 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1548] (rows=158402938 width=135) - Conds:RS_1856._col0=RS_1837._col0(Inner),Output:["_col1","_col2"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1837] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1831] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1856] - PartitionCols:_col0 - Select Operator [SEL_1854] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1853] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_66] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] - <-Reducer 40 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1852] - Group By Operator [GBY_1851] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1843] - Group By Operator [GBY_1841] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1835] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1831] - <-Reducer 45 [CONTAINS] - Reduce Output Operator [RS_1571] - Group By Operator [GBY_1570] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1569] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1567] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1566] (rows=633595212 width=88) - Conds:RS_1863._col0=RS_1627._col0(Inner),Output:["_col1","_col2"] - <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1627] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1863] - PartitionCols:_col0 - Select Operator [SEL_1861] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1860] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) - TableScan [TS_45] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] - <-Reducer 109 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1859] - Group By Operator [GBY_1858] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1639] - Group By Operator [GBY_1634] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1625] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1615] - <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1772] - Group By Operator [GBY_1771] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_1471] (rows=1 width=244) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_381] + Merge Join Operator [MERGEJOIN_1448] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1732] + Select Operator [SEL_1731] (rows=1 width=8) + Filter Operator [FIL_1730] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_1729] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1728] (rows=1 width=8) + Group By Operator [GBY_1727] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Union 11 [CUSTOM_SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] + Reduce Output Operator [RS_1470] + Group By Operator [GBY_1469] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1468] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1466] (rows=14736682 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1465] (rows=14736682 width=0) + Conds:RS_1648._col0=RS_1629._col0(Inner),Output:["_col1"] + <-Map 102 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1629] + PartitionCols:_col0 + Select Operator [SEL_1618] (rows=1957 width=8) + Output:["_col0"] + Filter Operator [FIL_1617] (rows=1957 width=8) + predicate:(d_date_sk is not null and d_year BETWEEN 1999 AND 2001) + TableScan [TS_97] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1648] + PartitionCols:_col0 + Select Operator [SEL_1646] (rows=550076554 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1645] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity"] + <-Reducer 108 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1644] + Group By Operator [GBY_1643] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1641] + Group By Operator [GBY_1636] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1626] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1618] + <-Reducer 22 [CONTAINS] + Reduce Output Operator [RS_1502] + Group By Operator [GBY_1501] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1500] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1498] (rows=7676736 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1497] (rows=7676736 width=3) + Conds:RS_1800._col0=RS_1787._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1787] + PartitionCols:_col0 + Select Operator [SEL_1782] (rows=1957 width=8) + Output:["_col0"] + Filter Operator [FIL_1781] (rows=1957 width=8) + predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) + TableScan [TS_13] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1800] + PartitionCols:_col0 + Select Operator [SEL_1798] (rows=286549727 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1797] (rows=286549727 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_10] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1796] + Group By Operator [GBY_1795] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1793] + Group By Operator [GBY_1791] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1784] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1782] + <-Reducer 38 [CONTAINS] + Reduce Output Operator [RS_1538] + Group By Operator [GBY_1537] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1536] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1534] (rows=3856907 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1533] (rows=3856907 width=3) + Conds:RS_1828._col0=RS_1815._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1815] + PartitionCols:_col0 + Select Operator [SEL_1810] (rows=1957 width=8) + Output:["_col0"] + Filter Operator [FIL_1809] (rows=1957 width=8) + predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) + TableScan [TS_24] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1828] + PartitionCols:_col0 + Select Operator [SEL_1826] (rows=143966864 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1825] (rows=143966864 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_21] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity"] + <-Reducer 41 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1824] + Group By Operator [GBY_1823] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1821] + Group By Operator [GBY_1819] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1812] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1810] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1735] + Select Operator [SEL_1734] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_1733] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 31 [CUSTOM_SIMPLE_EDGE] + <-Reducer 30 [CONTAINS] + Reduce Output Operator [RS_1520] + Group By Operator [GBY_1519] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1518] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1516] (rows=7676736 width=94) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1515] (rows=7676736 width=94) + Conds:RS_1807._col0=RS_1788._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1788] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1782] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1807] + PartitionCols:_col0 + Select Operator [SEL_1805] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1804] (rows=286549727 width=119) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_62_date_dim_d_date_sk_min) AND DynamicValue(RS_62_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_62_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) + TableScan [TS_55] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1803] + Group By Operator [GBY_1802] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1794] + Group By Operator [GBY_1792] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1786] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1782] + <-Reducer 44 [CONTAINS] + Reduce Output Operator [RS_1556] + Group By Operator [GBY_1555] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1554] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1552] (rows=3856907 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1551] (rows=3856907 width=114) + Conds:RS_1835._col0=RS_1816._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1816] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1810] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1835] + PartitionCols:_col0 + Select Operator [SEL_1833] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1832] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + TableScan [TS_66] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1831] + Group By Operator [GBY_1830] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1822] + Group By Operator [GBY_1820] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1814] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1810] + <-Reducer 48 [CONTAINS] + Reduce Output Operator [RS_1574] + Group By Operator [GBY_1573] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1572] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1570] (rows=14736682 width=0) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1569] (rows=14736682 width=0) + Conds:RS_1842._col0=RS_1630._col0(Inner),Output:["_col1","_col2"] + <-Map 102 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1630] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1618] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1842] + PartitionCols:_col0 + Select Operator [SEL_1840] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1839] (rows=550076554 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_45] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] + <-Reducer 109 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1838] + Group By Operator [GBY_1837] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1642] + Group By Operator [GBY_1637] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1628] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1618] + <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1753] + Group By Operator [GBY_1752] (rows=1 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 56 [SIMPLE_EDGE] - SHUFFLE [RS_369] + <-Reducer 61 [SIMPLE_EDGE] + SHUFFLE [RS_375] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_368] (rows=696935432 width=135) + Group By Operator [GBY_374] (rows=1 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_366] (rows=696935432 width=135) + Select Operator [SEL_372] (rows=1 width=128) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1430] (rows=696935432 width=135) - Conds:RS_362._col1=RS_1703._col0(Inner),RS_362._col1=RS_1762._col0(Inner),Output:["_col2","_col3","_col8","_col9","_col10"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1703] - PartitionCols:_col0 - Select Operator [SEL_1687] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1678] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_91] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] - <-Reducer 80 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1762] + Merge Join Operator [MERGEJOIN_1444] (rows=1 width=128) + Conds:RS_369._col1=RS_1743._col0(Inner),Output:["_col2","_col3","_col8","_col9","_col10"] + <-Reducer 82 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1743] PartitionCols:_col0 - Group By Operator [GBY_1761] (rows=254100 width=1436) + Group By Operator [GBY_1742] (rows=362 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 79 [SIMPLE_EDGE] - SHUFFLE [RS_356] + <-Reducer 81 [SIMPLE_EDGE] + SHUFFLE [RS_360] PartitionCols:_col0 - Group By Operator [GBY_355] (rows=508200 width=1436) + Group By Operator [GBY_359] (rows=362 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1429] (rows=508200 width=1436) - Conds:RS_1699._col1, _col2, _col3=RS_1760._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1699] + Merge Join Operator [MERGEJOIN_1427] (rows=724 width=4) + Conds:RS_1698._col1, _col2, _col3=RS_1741._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1698] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1683] (rows=462000 width=1436) + Select Operator [SEL_1686] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1674] (rows=462000 width=1436) + Filter Operator [FIL_1677] (rows=458612 width=15) predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_91] - <-Reducer 85 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1760] + TableScan [TS_91] (rows=462000 width=15) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] + <-Reducer 87 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1741] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1759] (rows=1 width=108) + Select Operator [SEL_1740] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1758] (rows=1 width=108) + Filter Operator [FIL_1739] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1757] (rows=304916424 width=108) + Group By Operator [GBY_1738] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 84 [SIMPLE_EDGE] - <-Reducer 83 [CONTAINS] vectorized - Reduce Output Operator [RS_1897] + <-Union 86 [SIMPLE_EDGE] + <-Reducer 85 [CONTAINS] vectorized + Reduce Output Operator [RS_1870] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1896] (rows=609832849 width=108) + Group By Operator [GBY_1869] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1895] (rows=348477374 width=88) + Group By Operator [GBY_1868] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 82 [SIMPLE_EDGE] - SHUFFLE [RS_300] + <-Reducer 84 [SIMPLE_EDGE] + SHUFFLE [RS_304] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_299] (rows=696954748 width=88) + Group By Operator [GBY_303] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_1424] (rows=696954748 width=88) - Conds:RS_295._col1=RS_1700._col0(Inner),Output:["_col5","_col6","_col7"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1700] + Merge Join Operator [MERGEJOIN_1422] (rows=14628613 width=11) + Conds:RS_299._col1=RS_1699._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1699] PartitionCols:_col0 - Select Operator [SEL_1684] (rows=462000 width=1436) + Select Operator [SEL_1687] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1675] (rows=462000 width=1436) + Filter Operator [FIL_1678] (rows=458612 width=15) predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_91] <-Reducer 101 [SIMPLE_EDGE] - SHUFFLE [RS_295] + SHUFFLE [RS_299] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1408] (rows=633595212 width=88) - Conds:RS_1871._col0=RS_1616._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1406] (rows=14736682 width=4) + Conds:RS_1848._col0=RS_1619._col0(Inner),Output:["_col1"] <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1616] + SHUFFLE [RS_1619] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] + Please refer to the previous Select Operator [SEL_1618] <-Map 100 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1871] + SHUFFLE [RS_1848] PartitionCols:_col0 - Select Operator [SEL_1870] (rows=575995635 width=88) + Select Operator [SEL_1847] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1869] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_107_iss_i_item_sk_min) AND DynamicValue(RS_107_iss_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_107_iss_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_104_d1_d_date_sk_min) AND DynamicValue(RS_104_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_104_d1_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_94] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] + Filter Operator [FIL_1846] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_104_d1_d_date_sk_min) AND DynamicValue(RS_104_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_104_d1_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_94] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 103 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1866] - Group By Operator [GBY_1865] (rows=1 width=12) + BROADCAST [RS_1845] + Group By Operator [GBY_1844] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1635] - Group By Operator [GBY_1630] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1617] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1615] - <-Reducer 71 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1868] - Group By Operator [GBY_1867] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1714] - Group By Operator [GBY_1708] (rows=1 width=12) + SHUFFLE [RS_1638] + Group By Operator [GBY_1633] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1692] (rows=462000 width=1436) + Select Operator [SEL_1620] (rows=1957 width=4) Output:["_col0"] - Select Operator [SEL_1681] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1672] (rows=462000 width=1436) - predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_91] - <-Reducer 93 [CONTAINS] vectorized - Reduce Output Operator [RS_1903] + Please refer to the previous Select Operator [SEL_1618] + <-Reducer 95 [CONTAINS] vectorized + Reduce Output Operator [RS_1876] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1902] (rows=609832849 width=108) + Group By Operator [GBY_1875] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1901] (rows=174233858 width=135) + Group By Operator [GBY_1874] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 92 [SIMPLE_EDGE] - SHUFFLE [RS_320] + <-Reducer 94 [SIMPLE_EDGE] + SHUFFLE [RS_324] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_319] (rows=348467716 width=135) + Group By Operator [GBY_323] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_1426] (rows=348467716 width=135) - Conds:RS_315._col1=RS_1701._col0(Inner),Output:["_col5","_col6","_col7"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1701] + Merge Join Operator [MERGEJOIN_1424] (rows=7620440 width=11) + Conds:RS_319._col1=RS_1700._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1700] PartitionCols:_col0 - Select Operator [SEL_1685] (rows=462000 width=1436) + Select Operator [SEL_1688] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1676] (rows=462000 width=1436) + Filter Operator [FIL_1679] (rows=458612 width=15) predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_91] <-Reducer 104 [SIMPLE_EDGE] - SHUFFLE [RS_315] + SHUFFLE [RS_319] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1410] (rows=316788826 width=135) - Conds:RS_1881._col0=RS_1618._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1408] (rows=7676736 width=4) + Conds:RS_1856._col0=RS_1621._col0(Inner),Output:["_col1"] <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1618] + SHUFFLE [RS_1621] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] + Please refer to the previous Select Operator [SEL_1618] <-Map 110 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1881] + SHUFFLE [RS_1856] PartitionCols:_col0 - Select Operator [SEL_1880] (rows=287989836 width=135) + Select Operator [SEL_1855] (rows=286549727 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1879] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_127_ics_i_item_sk_min) AND DynamicValue(RS_127_ics_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_127_ics_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_124_d2_d_date_sk_min) AND DynamicValue(RS_124_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_124_d2_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_114] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk"] + Filter Operator [FIL_1854] (rows=286549727 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_124_d2_d_date_sk_min) AND DynamicValue(RS_124_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_124_d2_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_114] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] <-Reducer 105 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1876] - Group By Operator [GBY_1875] (rows=1 width=12) + BROADCAST [RS_1853] + Group By Operator [GBY_1852] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1636] - Group By Operator [GBY_1631] (rows=1 width=12) + SHUFFLE [RS_1639] + Group By Operator [GBY_1634] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1619] (rows=73049 width=1119) + Select Operator [SEL_1622] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1615] - <-Reducer 74 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1878] - Group By Operator [GBY_1877] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1715] - Group By Operator [GBY_1709] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1694] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1681] - <-Reducer 96 [CONTAINS] vectorized - Reduce Output Operator [RS_1909] + Please refer to the previous Select Operator [SEL_1618] + <-Reducer 98 [CONTAINS] vectorized + Reduce Output Operator [RS_1882] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1908] (rows=609832849 width=108) + Group By Operator [GBY_1881] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1907] (rows=87121617 width=135) + Group By Operator [GBY_1880] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 95 [SIMPLE_EDGE] - SHUFFLE [RS_341] + <-Reducer 97 [SIMPLE_EDGE] + SHUFFLE [RS_345] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_340] (rows=174243235 width=135) + Group By Operator [GBY_344] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_1428] (rows=174243235 width=135) - Conds:RS_336._col1=RS_1702._col0(Inner),Output:["_col5","_col6","_col7"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1702] + Merge Join Operator [MERGEJOIN_1426] (rows=3828623 width=11) + Conds:RS_340._col1=RS_1701._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1701] PartitionCols:_col0 - Select Operator [SEL_1686] (rows=462000 width=1436) + Select Operator [SEL_1689] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1677] (rows=462000 width=1436) + Filter Operator [FIL_1680] (rows=458612 width=15) predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_91] <-Reducer 106 [SIMPLE_EDGE] - SHUFFLE [RS_336] + SHUFFLE [RS_340] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1412] (rows=158402938 width=135) - Conds:RS_1891._col0=RS_1620._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1410] (rows=3856907 width=4) + Conds:RS_1864._col0=RS_1623._col0(Inner),Output:["_col1"] <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1620] + SHUFFLE [RS_1623] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] + Please refer to the previous Select Operator [SEL_1618] <-Map 111 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1891] + SHUFFLE [RS_1864] PartitionCols:_col0 - Select Operator [SEL_1890] (rows=144002668 width=135) + Select Operator [SEL_1863] (rows=143966864 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1889] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_148_iws_i_item_sk_min) AND DynamicValue(RS_148_iws_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_148_iws_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_145_d3_d_date_sk_min) AND DynamicValue(RS_145_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_145_d3_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_135] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk"] + Filter Operator [FIL_1862] (rows=143966864 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_145_d3_d_date_sk_min) AND DynamicValue(RS_145_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_145_d3_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_135] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] <-Reducer 107 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1886] - Group By Operator [GBY_1885] (rows=1 width=12) + BROADCAST [RS_1861] + Group By Operator [GBY_1860] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 102 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1637] - Group By Operator [GBY_1632] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1621] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1615] - <-Reducer 77 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1888] - Group By Operator [GBY_1887] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1716] - Group By Operator [GBY_1710] (rows=1 width=12) + SHUFFLE [RS_1640] + Group By Operator [GBY_1635] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1696] (rows=462000 width=1436) + Select Operator [SEL_1624] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1681] - <-Reducer 55 [SIMPLE_EDGE] - SHUFFLE [RS_362] + Please refer to the previous Select Operator [SEL_1618] + <-Reducer 60 [ONE_TO_ONE_EDGE] + FORWARD [RS_369] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1422] (rows=316788826 width=135) - Conds:RS_1770._col0=RS_1660._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1660] + Merge Join Operator [MERGEJOIN_1420] (rows=7790806 width=110) + Conds:RS_366._col1=RS_1702._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1702] PartitionCols:_col0 - Select Operator [SEL_1657] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_1656] (rows=18262 width=1119) - predicate:((d_moy = 11) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_85] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 112 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1770] - PartitionCols:_col0 - Select Operator [SEL_1769] (rows=287989836 width=135) + Select Operator [SEL_1690] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1768] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_363_item_i_item_sk_min) AND DynamicValue(RS_363_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_363_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_364_item_i_item_sk_min) AND DynamicValue(RS_364_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_364_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_360_date_dim_d_date_sk_min) AND DynamicValue(RS_360_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_360_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_271] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 58 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1754] - Group By Operator [GBY_1753] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1668] - Group By Operator [GBY_1665] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1661] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1657] - <-Reducer 81 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1767] - Group By Operator [GBY_1766] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 80 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1765] - Group By Operator [GBY_1764] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1763] (rows=254100 width=1436) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_1761] - <-Reducer 98 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1756] - Group By Operator [GBY_1755] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1718] - Group By Operator [GBY_1712] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1704] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1687] - <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_1487] + Filter Operator [FIL_1681] (rows=462000 width=15) + predicate:i_item_sk is not null + Please refer to the previous TableScan [TS_91] + <-Reducer 59 [SIMPLE_EDGE] + SHUFFLE [RS_366] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1419] (rows=7790806 width=98) + Conds:RS_1751._col0=RS_1663._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 57 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1663] + PartitionCols:_col0 + Select Operator [SEL_1660] (rows=50 width=12) + Output:["_col0"] + Filter Operator [FIL_1659] (rows=50 width=12) + predicate:((d_moy = 11) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_85] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 112 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1751] + PartitionCols:_col0 + Select Operator [SEL_1750] (rows=286549727 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1749] (rows=286549727 width=123) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_370_item_i_item_sk_min) AND DynamicValue(RS_370_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_370_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_364_date_dim_d_date_sk_min) AND DynamicValue(RS_364_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_364_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_275] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 63 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1737] + Group By Operator [GBY_1736] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1671] + Group By Operator [GBY_1668] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1664] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1660] + <-Reducer 83 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1748] + Group By Operator [GBY_1747] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 82 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1746] + Group By Operator [GBY_1745] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1744] (rows=362 width=4) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_1742] + <-Reducer 19 [CONTAINS] + Reduce Output Operator [RS_1490] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1486] (rows=2032776160 width=237) + Group By Operator [GBY_1489] (rows=7 width=200) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1485] (rows=406555232 width=237) + Top N Key Operator [TNK_1488] (rows=3 width=221) keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100 - Select Operator [SEL_1483] (rows=58081078 width=264) + Select Operator [SEL_1486] (rows=1 width=219) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1482] (rows=58081078 width=264) + Filter Operator [FIL_1485] (rows=1 width=244) predicate:(_col5 > _col1) - Merge Join Operator [MERGEJOIN_1481] (rows=174243235 width=264) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1778] - Select Operator [SEL_1777] (rows=1 width=8) - Filter Operator [FIL_1776] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_1775] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1774] (rows=1 width=8) - Group By Operator [GBY_1773] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Union 14 [CUSTOM_SIMPLE_EDGE] - <-Reducer 13 [CONTAINS] - Reduce Output Operator [RS_1480] - Group By Operator [GBY_1479] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1478] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1476] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1475] (rows=633595212 width=88) - Conds:RS_1646._col0=RS_1628._col0(Inner),Output:["_col1"] - <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1628] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1646] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1643] - <-Reducer 20 [CONTAINS] - Reduce Output Operator [RS_1505] - Group By Operator [GBY_1504] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1503] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1501] (rows=316788826 width=135) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1500] (rows=316788826 width=135) - Conds:RS_1822._col0=RS_1810._col0(Inner),Output:["_col1"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1810] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1803] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1822] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1819] - <-Reducer 36 [CONTAINS] - Reduce Output Operator [RS_1541] - Group By Operator [GBY_1540] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1539] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1537] (rows=158402938 width=135) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1536] (rows=158402938 width=135) - Conds:RS_1850._col0=RS_1838._col0(Inner),Output:["_col1"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1838] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1831] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1850] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1847] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1781] - Select Operator [SEL_1780] (rows=1 width=120) - Output:["_col0"] - Group By Operator [GBY_1779] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 31 [CUSTOM_SIMPLE_EDGE] - <-Reducer 30 [CONTAINS] - Reduce Output Operator [RS_1523] - Group By Operator [GBY_1522] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1521] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1519] (rows=316788826 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1518] (rows=316788826 width=135) - Conds:RS_1829._col0=RS_1811._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1811] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1803] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1829] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1826] - <-Reducer 42 [CONTAINS] - Reduce Output Operator [RS_1559] - Group By Operator [GBY_1558] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1557] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1555] (rows=158402938 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1554] (rows=158402938 width=135) - Conds:RS_1857._col0=RS_1839._col0(Inner),Output:["_col1","_col2"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1839] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1831] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1857] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1854] - <-Reducer 46 [CONTAINS] - Reduce Output Operator [RS_1577] - Group By Operator [GBY_1576] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1575] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1573] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1572] (rows=633595212 width=88) - Conds:RS_1864._col0=RS_1629._col0(Inner),Output:["_col1","_col2"] - <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1629] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1864] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1861] - <-Reducer 61 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1801] - Group By Operator [GBY_1800] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_1484] (rows=1 width=244) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_575] + Merge Join Operator [MERGEJOIN_1450] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1759] + Select Operator [SEL_1758] (rows=1 width=8) + Filter Operator [FIL_1757] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_1756] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1755] (rows=1 width=8) + Group By Operator [GBY_1754] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Union 16 [CUSTOM_SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] + Reduce Output Operator [RS_1483] + Group By Operator [GBY_1482] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1481] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1479] (rows=14736682 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1478] (rows=14736682 width=0) + Conds:RS_1649._col0=RS_1631._col0(Inner),Output:["_col1"] + <-Map 102 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1631] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1618] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1649] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1646] + <-Reducer 23 [CONTAINS] + Reduce Output Operator [RS_1508] + Group By Operator [GBY_1507] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1506] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1504] (rows=7676736 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1503] (rows=7676736 width=3) + Conds:RS_1801._col0=RS_1789._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1789] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1782] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1801] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1798] + <-Reducer 39 [CONTAINS] + Reduce Output Operator [RS_1544] + Group By Operator [GBY_1543] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1542] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1540] (rows=3856907 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1539] (rows=3856907 width=3) + Conds:RS_1829._col0=RS_1817._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1817] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1810] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1829] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1826] + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1762] + Select Operator [SEL_1761] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_1760] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 34 [CUSTOM_SIMPLE_EDGE] + <-Reducer 33 [CONTAINS] + Reduce Output Operator [RS_1526] + Group By Operator [GBY_1525] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1524] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1522] (rows=7676736 width=94) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1521] (rows=7676736 width=94) + Conds:RS_1808._col0=RS_1790._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1790] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1782] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1808] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1805] + <-Reducer 45 [CONTAINS] + Reduce Output Operator [RS_1562] + Group By Operator [GBY_1561] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1560] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1558] (rows=3856907 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1557] (rows=3856907 width=114) + Conds:RS_1836._col0=RS_1818._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1818] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1810] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1836] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1833] + <-Reducer 49 [CONTAINS] + Reduce Output Operator [RS_1580] + Group By Operator [GBY_1579] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1578] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1576] (rows=14736682 width=0) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1575] (rows=14736682 width=0) + Conds:RS_1843._col0=RS_1632._col0(Inner),Output:["_col1","_col2"] + <-Map 102 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1632] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1618] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1843] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1840] + <-Reducer 67 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1780] + Group By Operator [GBY_1779] (rows=1 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 60 [SIMPLE_EDGE] - SHUFFLE [RS_559] + <-Reducer 66 [SIMPLE_EDGE] + SHUFFLE [RS_569] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_558] (rows=348486471 width=135) + Group By Operator [GBY_568] (rows=1 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_556] (rows=348486471 width=135) + Select Operator [SEL_566] (rows=1 width=128) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1445] (rows=348486471 width=135) - Conds:RS_552._col1=RS_1706._col0(Inner),RS_552._col1=RS_1791._col0(Inner),Output:["_col2","_col3","_col8","_col9","_col10"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1706] - PartitionCols:_col0 - Select Operator [SEL_1689] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1680] (rows=462000 width=1436) - predicate:i_item_sk is not null - Please refer to the previous TableScan [TS_91] - <-Reducer 90 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1791] + Merge Join Operator [MERGEJOIN_1445] (rows=1 width=128) + Conds:RS_563._col1=RS_1770._col0(Inner),Output:["_col2","_col3","_col8","_col9","_col10"] + <-Reducer 92 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1770] PartitionCols:_col0 - Group By Operator [GBY_1790] (rows=254100 width=1436) + Group By Operator [GBY_1769] (rows=362 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 89 [SIMPLE_EDGE] - SHUFFLE [RS_546] + <-Reducer 91 [SIMPLE_EDGE] + SHUFFLE [RS_554] PartitionCols:_col0 - Group By Operator [GBY_545] (rows=508200 width=1436) + Group By Operator [GBY_553] (rows=362 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1444] (rows=508200 width=1436) - Conds:RS_1705._col1, _col2, _col3=RS_1789._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1705] + Merge Join Operator [MERGEJOIN_1442] (rows=724 width=4) + Conds:RS_1703._col1, _col2, _col3=RS_1768._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1703] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1688] (rows=462000 width=1436) + Select Operator [SEL_1691] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1679] (rows=462000 width=1436) + Filter Operator [FIL_1682] (rows=458612 width=15) predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_91] - <-Reducer 88 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1789] + <-Reducer 90 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1768] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1788] (rows=1 width=108) + Select Operator [SEL_1767] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1787] (rows=1 width=108) + Filter Operator [FIL_1766] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1786] (rows=304916424 width=108) + Group By Operator [GBY_1765] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 87 [SIMPLE_EDGE] - <-Reducer 86 [CONTAINS] vectorized - Reduce Output Operator [RS_1900] + <-Union 89 [SIMPLE_EDGE] + <-Reducer 88 [CONTAINS] vectorized + Reduce Output Operator [RS_1873] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1899] (rows=609832849 width=108) + Group By Operator [GBY_1872] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1898] (rows=348477374 width=88) + Group By Operator [GBY_1871] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 82 [SIMPLE_EDGE] - SHUFFLE [RS_490] + <-Reducer 84 [SIMPLE_EDGE] + SHUFFLE [RS_498] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_299] - <-Reducer 94 [CONTAINS] vectorized - Reduce Output Operator [RS_1906] + Please refer to the previous Group By Operator [GBY_303] + <-Reducer 96 [CONTAINS] vectorized + Reduce Output Operator [RS_1879] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1905] (rows=609832849 width=108) + Group By Operator [GBY_1878] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1904] (rows=174233858 width=135) + Group By Operator [GBY_1877] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 92 [SIMPLE_EDGE] - SHUFFLE [RS_510] + <-Reducer 94 [SIMPLE_EDGE] + SHUFFLE [RS_518] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_319] - <-Reducer 97 [CONTAINS] vectorized - Reduce Output Operator [RS_1912] + Please refer to the previous Group By Operator [GBY_323] + <-Reducer 99 [CONTAINS] vectorized + Reduce Output Operator [RS_1885] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1911] (rows=609832849 width=108) + Group By Operator [GBY_1884] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1910] (rows=87121617 width=135) + Group By Operator [GBY_1883] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 95 [SIMPLE_EDGE] - SHUFFLE [RS_531] + <-Reducer 97 [SIMPLE_EDGE] + SHUFFLE [RS_539] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_340] - <-Reducer 59 [SIMPLE_EDGE] - SHUFFLE [RS_552] + Please refer to the previous Group By Operator [GBY_344] + <-Reducer 65 [ONE_TO_ONE_EDGE] + FORWARD [RS_563] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1437] (rows=158402938 width=135) - Conds:RS_1799._col0=RS_1662._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1662] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1657] - <-Map 113 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1799] + Merge Join Operator [MERGEJOIN_1435] (rows=3942084 width=130) + Conds:RS_560._col1=RS_1704._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1704] PartitionCols:_col0 - Select Operator [SEL_1798] (rows=144002668 width=135) + Select Operator [SEL_1692] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1797] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_553_item_i_item_sk_min) AND DynamicValue(RS_553_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_553_item_i_item_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_554_item_i_item_sk_min) AND DynamicValue(RS_554_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_554_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_550_date_dim_d_date_sk_min) AND DynamicValue(RS_550_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_550_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_461] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] - <-Reducer 62 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1783] - Group By Operator [GBY_1782] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1669] - Group By Operator [GBY_1666] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1663] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1657] - <-Reducer 91 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1796] - Group By Operator [GBY_1795] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 90 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1794] - Group By Operator [GBY_1793] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1792] (rows=254100 width=1436) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_1790] - <-Reducer 99 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1785] - Group By Operator [GBY_1784] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1719] - Group By Operator [GBY_1713] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1707] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1689] - <-Reducer 5 [CONTAINS] - Reduce Output Operator [RS_1461] + Filter Operator [FIL_1683] (rows=462000 width=15) + predicate:i_item_sk is not null + Please refer to the previous TableScan [TS_91] + <-Reducer 64 [SIMPLE_EDGE] + SHUFFLE [RS_560] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1434] (rows=3942084 width=118) + Conds:RS_1778._col0=RS_1665._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 57 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1665] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1660] + <-Map 113 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1778] + PartitionCols:_col0 + Select Operator [SEL_1777] (rows=143966864 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1776] (rows=143966864 width=123) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_564_item_i_item_sk_min) AND DynamicValue(RS_564_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_564_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_558_date_dim_d_date_sk_min) AND DynamicValue(RS_558_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_558_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_469] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] + <-Reducer 68 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1764] + Group By Operator [GBY_1763] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1672] + Group By Operator [GBY_1669] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1666] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1660] + <-Reducer 93 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1775] + Group By Operator [GBY_1774] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 92 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1773] + Group By Operator [GBY_1772] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1771] (rows=362 width=4) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_1769] + <-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_1464] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1460] (rows=2032776160 width=237) + Group By Operator [GBY_1463] (rows=7 width=200) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1459] (rows=406555232 width=237) + Top N Key Operator [TNK_1462] (rows=3 width=221) keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100 - Select Operator [SEL_1457] (rows=232318249 width=217) + Select Operator [SEL_1460] (rows=1 width=221) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1456] (rows=232318249 width=217) + Filter Operator [FIL_1459] (rows=1 width=244) predicate:(_col5 > _col1) - Merge Join Operator [MERGEJOIN_1455] (rows=696954748 width=217) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1655] - Select Operator [SEL_1654] (rows=1 width=120) - Output:["_col0"] - Group By Operator [GBY_1653] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 24 [CUSTOM_SIMPLE_EDGE] - <-Reducer 23 [CONTAINS] - Reduce Output Operator [RS_1511] - Group By Operator [GBY_1510] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1509] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1507] (rows=316788826 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1506] (rows=316788826 width=135) - Conds:RS_1827._col0=RS_1806._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1806] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1803] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1827] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1826] - <-Reducer 39 [CONTAINS] - Reduce Output Operator [RS_1547] - Group By Operator [GBY_1546] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1545] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1543] (rows=158402938 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1542] (rows=158402938 width=135) - Conds:RS_1855._col0=RS_1834._col0(Inner),Output:["_col1","_col2"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1834] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1831] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1855] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1854] - <-Reducer 44 [CONTAINS] - Reduce Output Operator [RS_1565] - Group By Operator [GBY_1564] (rows=1 width=120) - Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1563] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1561] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1560] (rows=633595212 width=88) - Conds:RS_1862._col0=RS_1624._col0(Inner),Output:["_col1","_col2"] - <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1624] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1862] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1861] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1652] - Select Operator [SEL_1651] (rows=1 width=8) - Filter Operator [FIL_1650] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_1649] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1648] (rows=1 width=8) - Group By Operator [GBY_1647] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Union 3 [CUSTOM_SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] - Reduce Output Operator [RS_1493] - Group By Operator [GBY_1492] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1491] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1489] (rows=316788826 width=135) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1488] (rows=316788826 width=135) - Conds:RS_1820._col0=RS_1804._col0(Inner),Output:["_col1"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1804] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1803] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1820] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1819] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_1454] - Group By Operator [GBY_1453] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1452] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1450] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1449] (rows=633595212 width=88) - Conds:RS_1644._col0=RS_1622._col0(Inner),Output:["_col1"] - <-Map 102 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1622] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1615] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1644] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1643] - <-Reducer 34 [CONTAINS] - Reduce Output Operator [RS_1529] - Group By Operator [GBY_1528] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_1527] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_1525] (rows=158402938 width=135) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_1524] (rows=158402938 width=135) - Conds:RS_1848._col0=RS_1832._col0(Inner),Output:["_col1"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1832] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1831] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1848] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1847] - <-Reducer 52 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1737] - Group By Operator [GBY_1736] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_1458] (rows=1 width=244) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_188] + Merge Join Operator [MERGEJOIN_1446] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1658] + Select Operator [SEL_1657] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_1656] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 27 [CUSTOM_SIMPLE_EDGE] + <-Reducer 26 [CONTAINS] + Reduce Output Operator [RS_1514] + Group By Operator [GBY_1513] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1512] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1510] (rows=7676736 width=94) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1509] (rows=7676736 width=94) + Conds:RS_1806._col0=RS_1785._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1785] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1782] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1806] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1805] + <-Reducer 42 [CONTAINS] + Reduce Output Operator [RS_1550] + Group By Operator [GBY_1549] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1548] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1546] (rows=3856907 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1545] (rows=3856907 width=114) + Conds:RS_1834._col0=RS_1813._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1813] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1810] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1834] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1833] + <-Reducer 47 [CONTAINS] + Reduce Output Operator [RS_1568] + Group By Operator [GBY_1567] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_1566] (rows=26270325 width=44) + Output:["_col0"] + Select Operator [SEL_1564] (rows=14736682 width=0) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_1563] (rows=14736682 width=0) + Conds:RS_1841._col0=RS_1627._col0(Inner),Output:["_col1","_col2"] + <-Map 102 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1627] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1618] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1841] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1840] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1655] + Select Operator [SEL_1654] (rows=1 width=8) + Filter Operator [FIL_1653] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_1652] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1651] (rows=1 width=8) + Group By Operator [GBY_1650] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Union 3 [CUSTOM_SIMPLE_EDGE] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_1457] + Group By Operator [GBY_1456] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1455] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1453] (rows=14736682 width=0) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1452] (rows=14736682 width=0) + Conds:RS_1647._col0=RS_1625._col0(Inner),Output:["_col1"] + <-Map 102 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1625] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1618] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1647] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1646] + <-Reducer 21 [CONTAINS] + Reduce Output Operator [RS_1496] + Group By Operator [GBY_1495] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1494] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1492] (rows=7676736 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1491] (rows=7676736 width=3) + Conds:RS_1799._col0=RS_1783._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1783] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1782] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1799] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1798] + <-Reducer 37 [CONTAINS] + Reduce Output Operator [RS_1532] + Group By Operator [GBY_1531] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_1530] (rows=26270325 width=1) + Output:["_col0"] + Select Operator [SEL_1528] (rows=3856907 width=3) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_1527] (rows=3856907 width=3) + Conds:RS_1827._col0=RS_1811._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1811] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1810] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1827] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1826] + <-Reducer 56 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1720] + Group By Operator [GBY_1719] (rows=1 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 51 [SIMPLE_EDGE] - SHUFFLE [RS_180] + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_182] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_179] (rows=1393909496 width=88) + Group By Operator [GBY_181] (rows=1 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_177] (rows=1393909496 width=88) + Select Operator [SEL_179] (rows=1 width=128) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1415] (rows=1393909496 width=88) - Conds:RS_173._col1=RS_1697._col0(Inner),RS_173._col1=RS_1727._col0(Inner),Output:["_col2","_col3","_col8","_col9","_col10"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1697] - PartitionCols:_col0 - Select Operator [SEL_1682] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1673] (rows=462000 width=1436) - predicate:i_item_sk is not null - Please refer to the previous TableScan [TS_91] - <-Reducer 65 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1727] + Merge Join Operator [MERGEJOIN_1443] (rows=1 width=128) + Conds:RS_176._col1=RS_1710._col0(Inner),Output:["_col2","_col3","_col8","_col9","_col10"] + <-Reducer 71 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1710] PartitionCols:_col0 - Group By Operator [GBY_1726] (rows=254100 width=1436) + Group By Operator [GBY_1709] (rows=362 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 64 [SIMPLE_EDGE] + <-Reducer 70 [SIMPLE_EDGE] SHUFFLE [RS_167] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=508200 width=1436) + Group By Operator [GBY_166] (rows=362 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1414] (rows=508200 width=1436) - Conds:RS_1690._col1, _col2, _col3=RS_1725._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1690] + Merge Join Operator [MERGEJOIN_1412] (rows=724 width=4) + Conds:RS_1693._col1, _col2, _col3=RS_1708._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1693] PartitionCols:_col1, _col2, _col3 - Please refer to the previous Select Operator [SEL_1681] - <-Reducer 70 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1725] + Select Operator [SEL_1684] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1675] (rows=458612 width=15) + predicate:(i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null) + Please refer to the previous TableScan [TS_91] + <-Reducer 76 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1708] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1724] (rows=1 width=108) + Select Operator [SEL_1707] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1723] (rows=1 width=108) + Filter Operator [FIL_1706] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1722] (rows=304916424 width=108) + Group By Operator [GBY_1705] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 69 [SIMPLE_EDGE] - <-Reducer 68 [CONTAINS] vectorized - Reduce Output Operator [RS_1874] + <-Union 75 [SIMPLE_EDGE] + <-Reducer 74 [CONTAINS] vectorized + Reduce Output Operator [RS_1851] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1873] (rows=609832849 width=108) + Group By Operator [GBY_1850] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1872] (rows=348477374 width=88) + Group By Operator [GBY_1849] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 67 [SIMPLE_EDGE] + <-Reducer 73 [SIMPLE_EDGE] SHUFFLE [RS_111] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_110] (rows=696954748 width=88) + Group By Operator [GBY_110] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_1409] (rows=696954748 width=88) - Conds:RS_106._col1=RS_1691._col0(Inner),Output:["_col5","_col6","_col7"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1691] + Merge Join Operator [MERGEJOIN_1407] (rows=14628613 width=11) + Conds:RS_106._col1=RS_1694._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1694] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1681] + Please refer to the previous Select Operator [SEL_1684] <-Reducer 101 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_1408] - <-Reducer 73 [CONTAINS] vectorized - Reduce Output Operator [RS_1884] + Please refer to the previous Merge Join Operator [MERGEJOIN_1406] + <-Reducer 78 [CONTAINS] vectorized + Reduce Output Operator [RS_1859] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1883] (rows=609832849 width=108) + Group By Operator [GBY_1858] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1882] (rows=174233858 width=135) + Group By Operator [GBY_1857] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 72 [SIMPLE_EDGE] + <-Reducer 77 [SIMPLE_EDGE] SHUFFLE [RS_131] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_130] (rows=348467716 width=135) + Group By Operator [GBY_130] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_1411] (rows=348467716 width=135) - Conds:RS_126._col1=RS_1693._col0(Inner),Output:["_col5","_col6","_col7"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1693] + Merge Join Operator [MERGEJOIN_1409] (rows=7620440 width=11) + Conds:RS_126._col1=RS_1695._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1695] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1681] + Please refer to the previous Select Operator [SEL_1684] <-Reducer 104 [SIMPLE_EDGE] SHUFFLE [RS_126] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_1410] - <-Reducer 76 [CONTAINS] vectorized - Reduce Output Operator [RS_1894] + Please refer to the previous Merge Join Operator [MERGEJOIN_1408] + <-Reducer 80 [CONTAINS] vectorized + Reduce Output Operator [RS_1867] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1893] (rows=609832849 width=108) + Group By Operator [GBY_1866] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1892] (rows=87121617 width=135) + Group By Operator [GBY_1865] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 75 [SIMPLE_EDGE] + <-Reducer 79 [SIMPLE_EDGE] SHUFFLE [RS_152] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_151] (rows=174243235 width=135) + Group By Operator [GBY_151] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_1413] (rows=174243235 width=135) - Conds:RS_147._col1=RS_1695._col0(Inner),Output:["_col5","_col6","_col7"] - <-Map 63 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1695] + Merge Join Operator [MERGEJOIN_1411] (rows=3828623 width=11) + Conds:RS_147._col1=RS_1696._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1696] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1681] + Please refer to the previous Select Operator [SEL_1684] <-Reducer 106 [SIMPLE_EDGE] SHUFFLE [RS_147] PartitionCols:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_1412] - <-Reducer 50 [SIMPLE_EDGE] - SHUFFLE [RS_173] + Please refer to the previous Merge Join Operator [MERGEJOIN_1410] + <-Reducer 54 [ONE_TO_ONE_EDGE] + FORWARD [RS_176] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1407] (rows=633595212 width=88) - Conds:RS_1735._col0=RS_1658._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1658] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1657] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1735] + Merge Join Operator [MERGEJOIN_1405] (rows=15062131 width=15) + Conds:RS_173._col1=RS_1697._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1697] PartitionCols:_col0 - Select Operator [SEL_1734] (rows=575995635 width=88) + Select Operator [SEL_1685] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1733] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_174_item_i_item_sk_min) AND DynamicValue(RS_174_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_174_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_175_item_i_item_sk_min) AND DynamicValue(RS_175_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_175_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_171_date_dim_d_date_sk_min) AND DynamicValue(RS_171_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_171_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_82] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] - <-Reducer 54 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1671] - Group By Operator [GBY_1670] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1667] - Group By Operator [GBY_1664] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1659] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1657] - <-Reducer 66 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1732] - Group By Operator [GBY_1731] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 65 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1730] - Group By Operator [GBY_1729] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1728] (rows=254100 width=1436) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_1726] - <-Reducer 78 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1721] - Group By Operator [GBY_1720] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1717] - Group By Operator [GBY_1711] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1698] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1682] + Filter Operator [FIL_1676] (rows=462000 width=15) + predicate:i_item_sk is not null + Please refer to the previous TableScan [TS_91] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_173] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1404] (rows=15062131 width=4) + Conds:RS_1718._col0=RS_1661._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 57 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1661] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1660] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1718] + PartitionCols:_col0 + Select Operator [SEL_1717] (rows=550076554 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1716] (rows=550076554 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_177_item_i_item_sk_min) AND DynamicValue(RS_177_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_177_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_171_date_dim_d_date_sk_min) AND DynamicValue(RS_171_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_171_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_82] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] + <-Reducer 58 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1674] + Group By Operator [GBY_1673] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1670] + Group By Operator [GBY_1667] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1662] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1660] + <-Reducer 72 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1715] + Group By Operator [GBY_1714] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 71 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1713] + Group By Operator [GBY_1712] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1711] (rows=362 width=4) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_1709] diff --git a/ql/src/test/results/clientpositive/perf/tez/query15.q.out b/ql/src/test/results/clientpositive/perf/tez/query15.q.out index 065b061203..565f1019ea 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query15.q.out @@ -63,73 +63,73 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_99] - Limit [LIM_98] (rows=100 width=135) + Limit [LIM_98] (rows=100 width=201) Number of rows:100 - Select Operator [SEL_97] (rows=174233858 width=135) + Select Operator [SEL_97] (rows=2555 width=201) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_96] - Group By Operator [GBY_95] (rows=174233858 width=135) + Group By Operator [GBY_95] (rows=2555 width=201) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 - Group By Operator [GBY_24] (rows=348467716 width=135) + Group By Operator [GBY_24] (rows=56210 width=201) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col4 - Top N Key Operator [TNK_44] (rows=348467716 width=135) + Top N Key Operator [TNK_44] (rows=20154874 width=279) keys:_col4,sort order:+,top n:100 - Select Operator [SEL_23] (rows=348467716 width=135) + Select Operator [SEL_23] (rows=20154874 width=279) Output:["_col4","_col7"] - Filter Operator [FIL_22] (rows=348467716 width=135) + Filter Operator [FIL_22] (rows=20154874 width=279) predicate:((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) - Merge Join Operator [MERGEJOIN_77] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_77] (rows=20154874 width=279) Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_75] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_75] (rows=80000000 width=179) Conds:RS_80._col1=RS_83._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_80] PartitionCols:_col1 - Select Operator [SEL_79] (rows=80000000 width=860) + Select Operator [SEL_79] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_78] (rows=80000000 width=860) + Filter Operator [FIL_78] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_83] PartitionCols:_col0 - Select Operator [SEL_82] (rows=40000000 width=1014) + Select Operator [SEL_82] (rows=40000000 width=179) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_81] (rows=40000000 width=1014) + Filter Operator [FIL_81] (rows=40000000 width=179) predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_zip"] + TableScan [TS_3] (rows=40000000 width=179) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_76] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_76] (rows=20154874 width=107) Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_86] PartitionCols:_col0 - Select Operator [SEL_85] (rows=18262 width=1119) + Select Operator [SEL_85] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_84] (rows=18262 width=1119) + Filter Operator [FIL_84] (rows=130 width=12) predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_94] PartitionCols:_col0 - Select Operator [SEL_93] (rows=287989836 width=135) + Select Operator [SEL_93] (rows=285117831 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_92] (rows=287989836 width=135) + Filter Operator [FIL_92] (rows=285117831 width=119) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] + TableScan [TS_6] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_91] Group By Operator [GBY_90] (rows=1 width=12) @@ -138,7 +138,7 @@ Stage-0 SHUFFLE [RS_89] Group By Operator [GBY_88] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_87] (rows=18262 width=1119) + Select Operator [SEL_87] (rows=130 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_85] diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out index 8f554cbbac..b09d953c4b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out @@ -73,16 +73,18 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -92,150 +94,161 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=1 width=344) + File Output Operator [FS_178] + Limit [LIM_177] (rows=1 width=240) Number of rows:100 - Select Operator [SEL_174] (rows=1 width=344) + Select Operator [SEL_176] (rows=1 width=240) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] - Select Operator [SEL_172] (rows=1 width=344) + SHUFFLE [RS_175] + Select Operator [SEL_174] (rows=1 width=240) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_171] (rows=1 width=344) + Group By Operator [GBY_173] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_170] - Group By Operator [GBY_169] (rows=1 width=344) + PARTITION_ONLY_SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_168] (rows=231905279 width=135) + Group By Operator [GBY_170] (rows=5150256 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_73] (rows=231905279 width=135) + Group By Operator [GBY_73] (rows=5150256 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_42] (rows=231905279 width=135) + Select Operator [SEL_42] (rows=5150256 width=214) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_41] (rows=231905279 width=135) + Filter Operator [FIL_41] (rows=5150256 width=214) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_130] (rows=463810558 width=135) - Conds:RS_38._col4=RS_167._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_167] + Merge Join Operator [MERGEJOIN_130] (rows=10300512 width=214) + Conds:RS_38._col4=RS_169._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_169] PartitionCols:_col0 - Select Operator [SEL_166] (rows=14399440 width=106) + Select Operator [SEL_168] (rows=18238808 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_165] (rows=14399440 width=106) + Group By Operator [GBY_167] (rows=18238808 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=28798881 width=106) + Group By Operator [GBY_165] (rows=28798881 width=4) Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_162] (rows=28798881 width=106) + Filter Operator [FIL_164] (rows=28798881 width=4) predicate:cr_order_number is not null - TableScan [TS_25] (rows=28798881 width=106) - default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] + TableScan [TS_25] (rows=28798881 width=4) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] <-Reducer 5 [ONE_TO_ONE_EDGE] FORWARD [RS_38] PartitionCols:_col4 - Select Operator [SEL_37] (rows=421645953 width=135) + Select Operator [SEL_37] (rows=5150256 width=200) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_129] (rows=421645953 width=135) - Conds:RS_34._col4=RS_161._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] - PartitionCols:_col0 - Group By Operator [GBY_160] (rows=287989836 width=135) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_159] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=287989836 width=135) - predicate:(cs_order_number is not null and cs_warehouse_sk is not null) - TableScan [TS_22] (rows=287989836 width=135) - default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] + Merge Join Operator [MERGEJOIN_129] (rows=5150256 width=202) + Conds:RS_34._col4=RS_163._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_128] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_128] (rows=5150256 width=200) Conds:RS_18._col2=RS_149._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 14 [SIMPLE_EDGE] vectorized + <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_148] (rows=60 width=2045) + Select Operator [SEL_148] (rows=10 width=102) Output:["_col0"] - Filter Operator [FIL_147] (rows=60 width=2045) + Filter Operator [FIL_147] (rows=10 width=102) predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) - TableScan [TS_9] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] + TableScan [TS_9] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_127] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_127] (rows=30901534 width=230) Conds:RS_15._col1=RS_141._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized + <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_140] (rows=20000000 width=1014) + Select Operator [SEL_140] (rows=784314 width=90) Output:["_col0"] - Filter Operator [FIL_139] (rows=20000000 width=1014) + Filter Operator [FIL_139] (rows=784314 width=90) predicate:((ca_state = 'NY') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_126] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_126] (rows=31519516 width=234) Conds:RS_157._col0=RS_133._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 10 [SIMPLE_EDGE] vectorized + <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_133] PartitionCols:_col0 - Select Operator [SEL_132] (rows=8116 width=1119) + Select Operator [SEL_132] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_131] (rows=8116 width=1119) + Filter Operator [FIL_131] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_156] (rows=287989836 width=135) + Select Operator [SEL_156] (rows=283695062 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_155] (rows=287989836 width=135) + Filter Operator [FIL_155] (rows=283695062 width=243) predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized + TableScan [TS_0] (rows=287989836 width=243) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized BROADCAST [RS_138] Group By Operator [GBY_137] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_136] Group By Operator [GBY_135] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_134] (rows=8116 width=1119) + Select Operator [SEL_134] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_132] - <-Reducer 13 [BROADCAST_EDGE] vectorized + <-Reducer 14 [BROADCAST_EDGE] vectorized BROADCAST [RS_146] Group By Operator [GBY_145] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_144] Group By Operator [GBY_143] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_142] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_142] (rows=784314 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_140] - <-Reducer 15 [BROADCAST_EDGE] vectorized + <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_154] Group By Operator [GBY_153] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_152] Group By Operator [GBY_151] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_150] (rows=60 width=2045) + Select Operator [SEL_150] (rows=10 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_148] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Group By Operator [GBY_162] (rows=286548719 width=7) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_161] (rows=286548719 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=286548719 width=7) + predicate:((cs_order_number BETWEEN DynamicValue(RS_34_cs1_cs_order_number_min) AND DynamicValue(RS_34_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_34_cs1_cs_order_number_bloom_filter))) and cs_order_number is not null and cs_warehouse_sk is not null) + TableScan [TS_22] (rows=287989836 width=7) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_159] + Group By Operator [GBY_158] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_114] (rows=5150256 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_128] diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index 3d6e1da184..fa576c3479 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -103,20 +103,20 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 18 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -126,192 +126,189 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_270] - Limit [LIM_269] (rows=100 width=88) + File Output Operator [FS_263] + Limit [LIM_262] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_268] (rows=421657640 width=88) + Select Operator [SEL_261] (rows=4815969644 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_267] - Select Operator [SEL_266] (rows=421657640 width=88) + SHUFFLE [RS_260] + Select Operator [SEL_259] (rows=4815969644 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_265] (rows=421657640 width=88) + Group By Operator [GBY_258] (rows=4815969644 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_49] (rows=843315281 width=88) + Group By Operator [GBY_49] (rows=4815969644 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_93] (rows=843315281 width=88) + Top N Key Operator [TNK_93] (rows=4815969644 width=381) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_47] (rows=843315281 width=88) + Select Operator [SEL_47] (rows=4815969644 width=381) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_213] (rows=843315281 width=88) - Conds:RS_44._col3=RS_251._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + Merge Join Operator [MERGEJOIN_213] (rows=4815969644 width=381) + Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] PartitionCols:_col0 - Select Operator [SEL_250] (rows=1704 width=1910) + Select Operator [SEL_256] (rows=1704 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=1704 width=1910) + Filter Operator [FIL_255] (rows=1704 width=90) predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + TableScan [TS_32] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_212] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_212] (rows=4815969644 width=299) Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col7, _col8, _col9 - Merge Join Operator [MERGEJOIN_211] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_211] (rows=540026342 width=19) Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] - <-Reducer 13 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_210] (rows=63350266 width=77) - Conds:RS_242._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_216] (rows=73049 width=1119) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] - PartitionCols:_col0 - Select Operator [SEL_241] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_240] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_15] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_28] + PARTITION_ONLY_SHUFFLE [RS_28] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=316788826 width=135) - Conds:RS_264._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_209] (rows=14254135 width=11) + Conds:RS_244._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_218] (rows=73049 width=1119) + Select Operator [SEL_218] (rows=3652 width=94) Output:["_col0"] - Filter Operator [FIL_215] (rows=73049 width=1119) + Filter Operator [FIL_215] (rows=3652 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_264] + TableScan [TS_3] (rows=73049 width=94) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_263] (rows=287989836 width=135) + Select Operator [SEL_243] (rows=285117831 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_262] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_9] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + Filter Operator [FIL_242] (rows=285117831 width=15) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_9] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_236] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_111] Group By Operator [GBY_110] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_109] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_109] (rows=2681277 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_248] - Group By Operator [GBY_246] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_210] (rows=2681277 width=10) + Conds:RS_233._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=3652 width=94) + Output:["_col0"] + Filter Operator [FIL_216] (rows=3652 width=94) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] + PartitionCols:_col0 + Select Operator [SEL_232] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_231] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_15] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_241] + Group By Operator [GBY_239] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_126] Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_124] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_124] (rows=2681277 width=2) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] Group By Operator [GBY_237] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_234] (rows=462000 width=1436) - Output:["_col0"] - Select Operator [SEL_232] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_231] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_228] Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=73049 width=1119) + Select Operator [SEL_223] (rows=3652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_218] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_29] + PartitionCols:_col1, _col2 + Please refer to the previous Merge Join Operator [MERGEJOIN_210] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_208] (rows=696954748 width=88) - Conds:RS_38._col1=RS_233._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] + Merge Join Operator [MERGEJOIN_208] (rows=27749405 width=294) + Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_232] + Select Operator [SEL_253] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_252] (rows=462000 width=288) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_207] (rows=633595212 width=88) - Conds:RS_259._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_207] (rows=27749405 width=10) + Conds:RS_251._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_217] (rows=36524 width=1119) + Select Operator [SEL_217] (rows=101 width=94) Output:["_col0"] - Filter Operator [FIL_214] (rows=36524 width=1119) + Filter Operator [FIL_214] (rows=101 width=94) predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_258] (rows=575995635 width=88) + Select Operator [SEL_250] (rows=501694138 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_257] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Please refer to the previous Group By Operator [GBY_243] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_247] - Please refer to the previous Group By Operator [GBY_246] + Filter Operator [FIL_249] (rows=501694138 width=23) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_28_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_28_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_28_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_28_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_235] + Please refer to the previous Group By Operator [GBY_234] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Please refer to the previous Group By Operator [GBY_237] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + BROADCAST [RS_240] + Please refer to the previous Group By Operator [GBY_239] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_246] + Group By Operator [GBY_245] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_114] (rows=14254135 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_248] + Group By Operator [GBY_247] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - Group By Operator [GBY_253] (rows=1 width=12) + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_252] (rows=1704 width=1910) + Select Operator [SEL_129] (rows=14254135 width=7) Output:["_col0"] - Please refer to the previous Select Operator [SEL_250] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_230] Group By Operator [GBY_229] (rows=1 width=12) @@ -320,7 +317,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_227] Group By Operator [GBY_225] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=36524 width=1119) + Select Operator [SEL_221] (rows=101 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_217] diff --git a/ql/src/test/results/clientpositive/perf/tez/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/query18.q.out index ed14fe0d2d..1fa1b9e3f2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query18.q.out @@ -81,165 +81,165 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE) -Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_187] - Limit [LIM_186] (rows=100 width=135) + File Output Operator [FS_184] + Limit [LIM_183] (rows=100 width=1165) Number of rows:100 - Select Operator [SEL_185] (rows=1054114882 width=135) + Select Operator [SEL_182] (rows=10969055 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] - Select Operator [SEL_183] (rows=1054114882 width=135) + SHUFFLE [RS_181] + Select Operator [SEL_180] (rows=10969055 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Group By Operator [GBY_182] (rows=1054114882 width=135) + Group By Operator [GBY_179] (rows=10969055 width=1229) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_42] (rows=2108229765 width=135) + Group By Operator [GBY_42] (rows=10969055 width=1229) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)","sum(_col10)","count(_col10)"],keys:_col0, _col1, _col2, _col3, 0L - Select Operator [SEL_40] (rows=421645953 width=135) + Select Operator [SEL_40] (rows=2193811 width=618) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Merge Join Operator [MERGEJOIN_145] (rows=421645953 width=135) + Merge Join Operator [MERGEJOIN_145] (rows=2193811 width=618) Conds:RS_37._col0=RS_38._col3(Inner),Output:["_col4","_col6","_col7","_col8","_col11","_col16","_col17","_col18","_col19","_col20","_col26"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_37] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_141] (rows=4959744 width=287) + Conds:RS_34._col1=RS_154._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_154] + PartitionCols:_col0 + Select Operator [SEL_153] (rows=1861800 width=4) + Output:["_col0"] + Filter Operator [FIL_152] (rows=1861800 width=4) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=4) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_140] (rows=4890586 width=290) + Conds:RS_148._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col2 + Select Operator [SEL_147] (rows=35631408 width=19) + Output:["_col0","_col1","_col2","_col4"] + Filter Operator [FIL_146] (rows=35631408 width=19) + predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=19) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=5490196 width=285) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_149] (rows=5490196 width=285) + predicate:((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=285) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col3 - Select Operator [SEL_30] (rows=383314495 width=135) + Select Operator [SEL_30] (rows=15983481 width=529) Output:["_col1","_col3","_col6","_col7","_col8","_col9","_col10","_col16"] - Merge Join Operator [MERGEJOIN_144] (rows=383314495 width=135) - Conds:RS_27._col3=RS_173._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col14","_col16"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_173] + Merge Join Operator [MERGEJOIN_144] (rows=15983481 width=529) + Conds:RS_27._col3=RS_178._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col14","_col16"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_178] PartitionCols:_col0 - Select Operator [SEL_172] (rows=462000 width=1436) + Select Operator [SEL_177] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_171] (rows=462000 width=1436) + Filter Operator [FIL_176] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_18] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 11 [SIMPLE_EDGE] + TableScan [TS_18] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_143] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_143] (rows=15983481 width=433) Conds:RS_24._col2=RS_165._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col14"] - <-Map 15 [SIMPLE_EDGE] vectorized + <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_164] (rows=465450 width=385) + Select Operator [SEL_164] (rows=103433 width=184) Output:["_col0","_col3"] - Filter Operator [FIL_163] (rows=465450 width=385) + Filter Operator [FIL_163] (rows=103433 width=187) predicate:((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=385) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] - <-Reducer 10 [SIMPLE_EDGE] + TableScan [TS_15] (rows=1861800 width=187) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_142] (rows=316788826 width=135) - Conds:RS_181._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 13 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_142] (rows=100578970 width=459) + Conds:RS_175._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_156] (rows=36524 width=1119) + Select Operator [SEL_156] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_155] (rows=36524 width=1119) + Filter Operator [FIL_155] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_181] + TableScan [TS_12] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_175] PartitionCols:_col0 - Select Operator [SEL_180] (rows=287989836 width=135) + Select Operator [SEL_174] (rows=283692098 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_179] (rows=287989836 width=135) - predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_25_cd1_cd_demo_sk_min) AND DynamicValue(RS_25_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_25_cd1_cd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_9] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_173] (rows=283692098 width=466) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_25_cd1_cd_demo_sk_min) AND DynamicValue(RS_25_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_25_cd1_cd_demo_sk_bloom_filter))) and (cs_bill_customer_sk BETWEEN DynamicValue(RS_37_customer_c_customer_sk_min) AND DynamicValue(RS_37_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_37_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_9] (rows=287989836 width=466) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized BROADCAST [RS_162] Group By Operator [GBY_161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_160] Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_158] (rows=36524 width=1119) + Select Operator [SEL_158] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_156] - <-Reducer 16 [BROADCAST_EDGE] vectorized + <-Reducer 17 [BROADCAST_EDGE] vectorized BROADCAST [RS_170] Group By Operator [GBY_169] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_168] Group By Operator [GBY_167] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_166] (rows=465450 width=385) + Select Operator [SEL_166] (rows=103433 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_164] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_178] - Group By Operator [GBY_177] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_176] - Group By Operator [GBY_175] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_174] (rows=462000 width=1436) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4890586)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4890586)"] + Select Operator [SEL_124] (rows=4959744 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_172] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_141] (rows=96800003 width=860) - Conds:RS_34._col1=RS_154._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] - PartitionCols:_col0 - Select Operator [SEL_153] (rows=1861800 width=385) - Output:["_col0"] - Filter Operator [FIL_152] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_140] (rows=88000001 width=860) - Conds:RS_148._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] - PartitionCols:_col2 - Select Operator [SEL_147] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col4"] - Filter Operator [FIL_146] (rows=80000000 width=860) - predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - PartitionCols:_col0 - Select Operator [SEL_150] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_149] (rows=40000000 width=1014) - predicate:((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] + Please refer to the previous Merge Join Operator [MERGEJOIN_141] diff --git a/ql/src/test/results/clientpositive/perf/tez/query19.q.out b/ql/src/test/results/clientpositive/perf/tez/query19.q.out index c9b8739569..fd8578f402 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query19.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query19.q.out @@ -63,162 +63,138 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 8 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_164] - Limit [LIM_163] (rows=100 width=88) + File Output Operator [FS_157] + Limit [LIM_156] (rows=100 width=419) Number of rows:100 - Select Operator [SEL_162] (rows=421657640 width=88) + Select Operator [SEL_155] (rows=2098703 width=418) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] - Select Operator [SEL_160] (rows=421657640 width=88) + SHUFFLE [RS_154] + Select Operator [SEL_153] (rows=2098703 width=418) Output:["_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_159] (rows=421657640 width=88) + Group By Operator [GBY_152] (rows=2098703 width=314) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_36] (rows=843315281 width=88) + Group By Operator [GBY_36] (rows=2098703 width=314) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col14, _col13, _col15, _col16 - Select Operator [SEL_35] (rows=843315281 width=88) + Select Operator [SEL_35] (rows=2098703 width=380) Output:["_col8","_col13","_col14","_col15","_col16"] - Filter Operator [FIL_34] (rows=843315281 width=88) + Filter Operator [FIL_34] (rows=2098703 width=380) predicate:(substr(_col3, 1, 5) <> substr(_col19, 1, 5)) - Merge Join Operator [MERGEJOIN_123] (rows=843315281 width=88) - Conds:RS_31._col7=RS_150._col0(Inner),Output:["_col3","_col8","_col13","_col14","_col15","_col16","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + Merge Join Operator [MERGEJOIN_123] (rows=2098703 width=380) + Conds:RS_31._col7=RS_151._col0(Inner),Output:["_col3","_col8","_col13","_col14","_col15","_col16","_col19"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] PartitionCols:_col0 - Select Operator [SEL_149] (rows=1704 width=1910) + Select Operator [SEL_150] (rows=1704 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_148] (rows=1704 width=1910) + Filter Operator [FIL_149] (rows=1704 width=93) predicate:s_store_sk is not null - TableScan [TS_22] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_zip"] + TableScan [TS_22] (rows=1704 width=93) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_zip"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_122] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_122] (rows=2098703 width=291) Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col3","_col7","_col8","_col13","_col14","_col15","_col16"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_28] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_119] (rows=88000001 width=860) - Conds:RS_126._col1=RS_129._col0(Inner),Output:["_col0","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - PartitionCols:_col1 - Select Operator [SEL_125] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] - PartitionCols:_col0 - Select Operator [SEL_128] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_127] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_zip"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_121] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_121] (rows=2098703 width=202) Conds:RS_18._col1=RS_140._col0(Inner),Output:["_col2","_col3","_col4","_col9","_col10","_col11","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized + <-Map 13 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_139] (rows=231000 width=1436) + Select Operator [SEL_139] (rows=7333 width=210) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_138] (rows=231000 width=1436) + Filter Operator [FIL_138] (rows=7333 width=210) predicate:((i_manager_id = 7) and i_item_sk is not null) - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id","i_manufact","i_manager_id"] - <-Reducer 10 [SIMPLE_EDGE] + TableScan [TS_12] (rows=462000 width=210) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id","i_manufact","i_manager_id"] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_120] (rows=633595212 width=88) - Conds:RS_158._col0=RS_132._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_132] + Merge Join Operator [MERGEJOIN_120] (rows=13737330 width=4) + Conds:RS_148._col0=RS_132._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] PartitionCols:_col0 - Select Operator [SEL_131] (rows=18262 width=1119) + Select Operator [SEL_131] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_130] (rows=18262 width=1119) + Filter Operator [FIL_130] (rows=50 width=12) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_157] (rows=575995635 width=88) + Select Operator [SEL_147] (rows=501694138 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_156] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_28_customer_c_customer_sk_min) AND DynamicValue(RS_28_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_28_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_32_store_s_store_sk_min) AND DynamicValue(RS_32_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_32_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_146] (rows=501694138 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized BROADCAST [RS_137] Group By Operator [GBY_136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_135] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] Group By Operator [GBY_134] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=18262 width=1119) + Select Operator [SEL_133] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_131] - <-Reducer 15 [BROADCAST_EDGE] vectorized + <-Reducer 14 [BROADCAST_EDGE] vectorized BROADCAST [RS_145] Group By Operator [GBY_144] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_143] Group By Operator [GBY_142] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=231000 width=1436) + Select Operator [SEL_141] (rows=7333 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_139] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_155] - Group By Operator [GBY_154] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] - Group By Operator [GBY_152] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_151] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_149] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_147] - Group By Operator [GBY_146] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_96] - Group By Operator [GBY_95] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] - Select Operator [SEL_94] (rows=88000001 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_119] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_119] (rows=80000000 width=93) + Conds:RS_126._col1=RS_129._col0(Inner),Output:["_col0","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col1 + Select Operator [SEL_125] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=80000000 width=8) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=40000000 width=93) + Output:["_col0","_col1"] + Filter Operator [FIL_127] (rows=40000000 width=93) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=93) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query2.q.out b/ql/src/test/results/clientpositive/perf/tez/query2.q.out index f4ad16e2c2..f46b336f68 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query2.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query2.q.out @@ -127,161 +127,100 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 15 <- Reducer 13 (BROADCAST_EDGE), Union 16 (CONTAINS) -Map 17 <- Reducer 13 (BROADCAST_EDGE), Union 16 (CONTAINS) -Map 7 <- Reducer 9 (BROADCAST_EDGE), Union 2 (CONTAINS) -Reducer 10 <- Map 8 (SIMPLE_EDGE), Union 16 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (ONE_TO_ONE_EDGE) -Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Map 1 <- Union 2 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 5 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_189] - Select Operator [SEL_188] (rows=574982057 width=135) + Reducer 7 vectorized + File Output Operator [FS_173] + Select Operator [SEL_172] (rows=12881 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_55] - Select Operator [SEL_54] (rows=574982057 width=135) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_57] + Select Operator [SEL_56] (rows=12881 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_141] (rows=574982057 width=135) - Conds:RS_179._col0=RS_186._col0(Inner),RS_179._col0=RS_52.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + Merge Join Operator [MERGEJOIN_146] (rows=12881 width=1572) + Conds:RS_53._col0=RS_54.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_53] PartitionCols:_col0 - Select Operator [SEL_184] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_182] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_week_seq is not null) - TableScan [TS_20] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_week_seq","d_year"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:(_col0 - 53) - Merge Join Operator [MERGEJOIN_140] (rows=261355475 width=135) - Conds:RS_181._col0=RS_187._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + Merge Join Operator [MERGEJOIN_143] (rows=652 width=788) + Conds:RS_164._col0=RS_170._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] PartitionCols:_col0 - Select Operator [SEL_185] (rows=36524 width=1119) + Select Operator [SEL_168] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_183] (rows=36524 width=1119) - predicate:((d_year = 2002) and d_week_seq is not null) - Please refer to the previous TableScan [TS_20] - <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_181] + Filter Operator [FIL_166] (rows=652 width=8) + predicate:((d_year = 2001) and d_week_seq is not null) + TableScan [TS_20] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_week_seq","d_year"] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_164] PartitionCols:_col0 - Group By Operator [GBY_180] (rows=237595882 width=135) + Group By Operator [GBY_163] (rows=13152 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_39] (rows=475191764 width=135) + Group By Operator [GBY_16] (rows=4576896 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 - Select Operator [SEL_37] (rows=475191764 width=135) + Select Operator [SEL_14] (rows=430516591 width=206) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_139] (rows=475191764 width=135) - Conds:Union 16._col0=RS_166._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_166] + Merge Join Operator [MERGEJOIN_142] (rows=430516591 width=206) + Conds:Union 2._col0=RS_162._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_163] (rows=73049 width=1119) + Select Operator [SEL_161] (rows=73049 width=99) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_162] (rows=73049 width=1119) + Filter Operator [FIL_160] (rows=73049 width=99) predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_8] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Union 16 [SIMPLE_EDGE] - <-Map 15 [CONTAINS] vectorized - Reduce Output Operator [RS_198] + TableScan [TS_8] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + Reduce Output Operator [RS_159] PartitionCols:_col0 - Select Operator [SEL_197] (rows=144002668 width=135) + Select Operator [SEL_158] (rows=143966864 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_196] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_152] (rows=144002668 width=135) + Filter Operator [FIL_157] (rows=143966864 width=115) + predicate:ws_sold_date_sk is not null + TableScan [TS_147] (rows=144002668 width=115) Output:["ws_sold_date_sk","ws_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_194] - Group By Operator [GBY_193] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_171] - Group By Operator [GBY_169] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_167] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_163] - <-Map 17 [CONTAINS] vectorized - Reduce Output Operator [RS_201] + <-Map 9 [CONTAINS] vectorized + Reduce Output Operator [RS_176] PartitionCols:_col0 - Select Operator [SEL_200] (rows=287989836 width=135) + Select Operator [SEL_175] (rows=286549727 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_199] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_157] (rows=287989836 width=135) + Filter Operator [FIL_174] (rows=286549727 width=115) + predicate:cs_sold_date_sk is not null + TableScan [TS_152] (rows=287989836 width=115) Output:["cs_sold_date_sk","cs_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_195] - Please refer to the previous Group By Operator [GBY_193] - <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_179] - PartitionCols:_col0 - Group By Operator [GBY_178] (rows=237595882 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:(_col0 - 53) + Merge Join Operator [MERGEJOIN_145] (rows=652 width=788) + Conds:RS_165._col0=RS_171._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_171] + PartitionCols:_col0 + Select Operator [SEL_169] (rows=652 width=8) + Output:["_col0"] + Filter Operator [FIL_167] (rows=652 width=8) + predicate:((d_year = 2002) and d_week_seq is not null) + Please refer to the previous TableScan [TS_20] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_165] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=475191764 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 - Select Operator [SEL_14] (rows=475191764 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_138] (rows=475191764 width=135) - Conds:Union 2._col0=RS_164._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_164] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_163] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_177] - PartitionCols:_col0 - Select Operator [SEL_176] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_175] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_142] (rows=144002668 width=135) - Output:["ws_sold_date_sk","ws_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_173] - Group By Operator [GBY_172] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_170] - Group By Operator [GBY_168] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_165] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_163] - <-Map 7 [CONTAINS] vectorized - Reduce Output Operator [RS_192] - PartitionCols:_col0 - Select Operator [SEL_191] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_190] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_147] (rows=287989836 width=135) - Output:["cs_sold_date_sk","cs_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_174] - Please refer to the previous Group By Operator [GBY_172] + Please refer to the previous Group By Operator [GBY_163] diff --git a/ql/src/test/results/clientpositive/perf/tez/query20.q.out b/ql/src/test/results/clientpositive/perf/tez/query20.q.out index d915f3a1fd..892beb37cb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query20.q.out @@ -80,62 +80,62 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_86] - Limit [LIM_85] (rows=100 width=135) + Limit [LIM_85] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_84] (rows=174233858 width=135) + Select Operator [SEL_84] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_83] - Select Operator [SEL_82] (rows=174233858 width=135) + Select Operator [SEL_82] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_81] (rows=174233858 width=135) + PTF Operator [PTF_81] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_80] (rows=174233858 width=135) + Select Operator [SEL_80] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_79] PartitionCols:_col1 - Group By Operator [GBY_78] (rows=174233858 width=135) + Group By Operator [GBY_78] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=348467716 width=135) + Group By Operator [GBY_16] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_58] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_58] (rows=9551005 width=673) Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_69] PartitionCols:_col0 - Select Operator [SEL_68] (rows=462000 width=1436) + Select Operator [SEL_68] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_67] (rows=462000 width=1436) + Filter Operator [FIL_67] (rows=138600 width=581) predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + TableScan [TS_6] (rows=462000 width=581) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_57] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_57] (rows=31836679 width=110) Conds:RS_77._col0=RS_61._col0(Inner),Output:["_col1","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_60] (rows=8116 width=1119) + Select Operator [SEL_60] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_59] (rows=8116 width=1119) + Filter Operator [FIL_59] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_77] PartitionCols:_col0 - Select Operator [SEL_76] (rows=287989836 width=135) + Select Operator [SEL_76] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_75] (rows=287989836 width=135) + Filter Operator [FIL_75] (rows=286549727 width=119) predicate:((cs_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] + TableScan [TS_0] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_74] Group By Operator [GBY_73] (rows=1 width=12) @@ -144,7 +144,7 @@ Stage-0 SHUFFLE [RS_72] Group By Operator [GBY_71] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_70] (rows=462000 width=1436) + Select Operator [SEL_70] (rows=138600 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_68] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -155,7 +155,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_64] Group By Operator [GBY_63] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_62] (rows=8116 width=1119) + Select Operator [SEL_62] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_60] diff --git a/ql/src/test/results/clientpositive/perf/tez/query21.q.out b/ql/src/test/results/clientpositive/perf/tez/query21.q.out index 05a4627369..be040df41a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query21.q.out @@ -81,52 +81,52 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_95] - Limit [LIM_94] (rows=100 width=15) + Limit [LIM_94] (rows=100 width=216) Number of rows:100 - Select Operator [SEL_93] (rows=12506076 width=15) + Select Operator [SEL_93] (rows=231983 width=216) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_92] - Filter Operator [FIL_91] (rows=12506076 width=15) + Filter Operator [FIL_91] (rows=231983 width=216) predicate:CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END - Group By Operator [GBY_90] (rows=25012152 width=15) + Group By Operator [GBY_90] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0, _col1 - Group By Operator [GBY_23] (rows=50024305 width=15) + Group By Operator [GBY_23] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Select Operator [SEL_21] (rows=50024305 width=15) + Select Operator [SEL_21] (rows=463966 width=294) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_77] (rows=50024305 width=15) + Merge Join Operator [MERGEJOIN_77] (rows=463966 width=294) Conds:RS_18._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col7","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_89] PartitionCols:_col0 - Select Operator [SEL_88] (rows=27 width=1029) + Select Operator [SEL_88] (rows=27 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_87] (rows=27 width=1029) + Filter Operator [FIL_87] (rows=27 width=104) predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + TableScan [TS_9] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_76] (rows=45476640 width=15) + Merge Join Operator [MERGEJOIN_76] (rows=463966 width=198) Conds:RS_15._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_86] PartitionCols:_col0 - Select Operator [SEL_85] (rows=51333 width=1436) + Select Operator [SEL_85] (rows=51333 width=215) Output:["_col0","_col1"] - Filter Operator [FIL_84] (rows=51333 width=1436) + Filter Operator [FIL_84] (rows=51333 width=215) predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_current_price"] + TableScan [TS_6] (rows=462000 width=215) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_75] (rows=41342400 width=15) + Merge Join Operator [MERGEJOIN_75] (rows=4175715 width=104) Conds:RS_80._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_80] @@ -136,14 +136,14 @@ Stage-0 Filter Operator [FIL_78] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_83] PartitionCols:_col0 - Select Operator [SEL_82] (rows=8116 width=1119) + Select Operator [SEL_82] (rows=8116 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_81] (rows=8116 width=1119) + Filter Operator [FIL_81] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/query22.q.out index 81f9b8d381..df6889b5ff 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query22.q.out @@ -65,50 +65,50 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_93] - Limit [LIM_92] (rows=100 width=15) + Limit [LIM_92] (rows=100 width=397) Number of rows:100 - Select Operator [SEL_91] (rows=125060762 width=15) + Select Operator [SEL_91] (rows=32730675 width=397) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_90] - Select Operator [SEL_89] (rows=125060762 width=15) + Select Operator [SEL_89] (rows=32730675 width=397) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_88] (rows=125060762 width=15) + Group By Operator [GBY_88] (rows=32730675 width=413) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_22] (rows=250121525 width=15) + Group By Operator [GBY_22] (rows=32730675 width=413) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)"],keys:_col8, _col9, _col10, _col11, 0L - Merge Join Operator [MERGEJOIN_75] (rows=50024305 width=15) + Merge Join Operator [MERGEJOIN_75] (rows=6546135 width=391) Conds:RS_18._col1=RS_87._col0(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=462000 width=1436) + Select Operator [SEL_86] (rows=462000 width=393) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_85] (rows=462000 width=1436) + Filter Operator [FIL_85] (rows=462000 width=393) predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + TableScan [TS_9] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_74] (rows=45476640 width=15) + Merge Join Operator [MERGEJOIN_74] (rows=6546135 width=6) Conds:RS_15._col2=RS_84._col0(Inner),Output:["_col1","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_84] PartitionCols:_col0 - Select Operator [SEL_83] (rows=27 width=1029) + Select Operator [SEL_83] (rows=27 width=4) Output:["_col0"] - Filter Operator [FIL_82] (rows=27 width=1029) + Filter Operator [FIL_82] (rows=27 width=4) predicate:w_warehouse_sk is not null - TableScan [TS_6] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk"] + TableScan [TS_6] (rows=27 width=4) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_73] (rows=41342400 width=15) + Merge Join Operator [MERGEJOIN_73] (rows=6546135 width=10) Conds:RS_78._col0=RS_81._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_78] @@ -118,14 +118,14 @@ Stage-0 Filter Operator [FIL_76] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_81] PartitionCols:_col0 - Select Operator [SEL_80] (rows=73049 width=1119) + Select Operator [SEL_80] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_79] (rows=73049 width=1119) + Filter Operator [FIL_79] (rows=317 width=8) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 3e4dffbaaf..61d1dd7338 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,5 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[585][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 28' is a cross product -Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 36' is a cross product +Warning: Shuffle Join MERGEJOIN[589][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[590][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[592][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -119,443 +121,444 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 13 <- Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) -Map 23 <- Reducer 31 (BROADCAST_EDGE) -Map 43 <- Reducer 12 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) -Map 44 <- Reducer 38 (BROADCAST_EDGE) -Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) -Reducer 25 <- Map 42 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Reducer 26 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 30 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) -Reducer 33 <- Map 42 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (SIMPLE_EDGE) -Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 37 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) -Reducer 38 <- Map 30 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 28 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) -Reducer 41 <- Reducer 40 (SIMPLE_EDGE) +Map 1 <- Reducer 19 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 15 <- Reducer 22 (BROADCAST_EDGE) +Map 24 <- Reducer 37 (BROADCAST_EDGE) +Map 38 <- Reducer 7 (BROADCAST_EDGE) +Map 44 <- Reducer 14 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Map 45 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 44 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 13 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 17 <- Map 23 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 26 <- Map 41 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 40 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 43 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 36 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Reducer 39 (SIMPLE_EDGE) +Reducer 42 <- Map 41 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) +Reducer 43 <- Reducer 42 (SIMPLE_EDGE) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 43 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_691] - Limit [LIM_690] (rows=1 width=112) + File Output Operator [FS_695] + Limit [LIM_694] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_689] (rows=1 width=112) + Group By Operator [GBY_693] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] - Reduce Output Operator [RS_598] - Group By Operator [GBY_597] (rows=1 width=112) + <-Reducer 12 [CONTAINS] + Reduce Output Operator [RS_604] + Group By Operator [GBY_603] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_595] (rows=191667562 width=135) + Select Operator [SEL_601] (rows=1 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_594] (rows=191667562 width=135) - Conds:RS_244._col2=RS_245._col0(Inner),Output:["_col3","_col4"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_244] + Merge Join Operator [MERGEJOIN_600] (rows=1 width=116) + Conds:RS_248._col2=RS_249._col0(Inner),Output:["_col3","_col4"] + <-Reducer 11 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_248] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_584] (rows=174243235 width=135) - Conds:RS_241._col1=RS_640._col0(Inner),Output:["_col2","_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_640] + Merge Join Operator [MERGEJOIN_588] (rows=155 width=0) + Conds:RS_245._col1=RS_638._col0(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_638] PartitionCols:_col0 - Group By Operator [GBY_637] (rows=58079562 width=88) + Group By Operator [GBY_635] (rows=2235 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_636] (rows=116159124 width=88) + Select Operator [SEL_634] (rows=6548799 width=12) Output:["_col1"] - Filter Operator [FIL_635] (rows=116159124 width=88) + Filter Operator [FIL_633] (rows=6548799 width=12) predicate:(_col3 > 4L) - Select Operator [SEL_634] (rows=348477374 width=88) + Select Operator [SEL_632] (rows=19646398 width=12) Output:["_col0","_col3"] - Group By Operator [GBY_633] (rows=348477374 width=88) + Group By Operator [GBY_631] (rows=19646398 width=290) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0 - Group By Operator [GBY_23] (rows=696954748 width=88) + Group By Operator [GBY_23] (rows=19646398 width=290) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_21] (rows=696954748 width=88) + Select Operator [SEL_21] (rows=19646398 width=282) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_569] (rows=696954748 width=88) - Conds:RS_18._col1=RS_621._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_621] + Merge Join Operator [MERGEJOIN_573] (rows=19646398 width=282) + Conds:RS_18._col1=RS_630._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_630] PartitionCols:_col0 - Select Operator [SEL_620] (rows=462000 width=1436) + Select Operator [SEL_629] (rows=462000 width=188) Output:["_col0","_col1"] - Filter Operator [FIL_619] (rows=462000 width=1436) + Filter Operator [FIL_628] (rows=462000 width=188) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 14 [SIMPLE_EDGE] + TableScan [TS_12] (rows=462000 width=188) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_568] (rows=633595212 width=88) - Conds:RS_632._col0=RS_613._col0(Inner),Output:["_col1","_col3"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_613] + Merge Join Operator [MERGEJOIN_572] (rows=19646398 width=98) + Conds:RS_627._col0=RS_619._col0(Inner),Output:["_col1","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_619] PartitionCols:_col0 - Select Operator [SEL_612] (rows=73049 width=1119) + Select Operator [SEL_618] (rows=2609 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_611] (rows=73049 width=1119) + Filter Operator [FIL_617] (rows=2609 width=102) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_632] + TableScan [TS_9] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_year"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_627] PartitionCols:_col0 - Select Operator [SEL_631] (rows=575995635 width=88) + Select Operator [SEL_626] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_630] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_618] - Group By Operator [GBY_617] (rows=1 width=12) + Filter Operator [FIL_625] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_6] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_624] + Group By Operator [GBY_623] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_616] - Group By Operator [GBY_615] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_622] + Group By Operator [GBY_621] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_614] (rows=73049 width=1119) + Select Operator [SEL_620] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_612] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_629] - Group By Operator [GBY_628] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_626] - Group By Operator [GBY_624] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_622] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_620] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_241] + Please refer to the previous Select Operator [SEL_618] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_245] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_575] (rows=158402938 width=135) - Conds:RS_698._col0=RS_603._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_603] + Merge Join Operator [MERGEJOIN_579] (rows=3941102 width=122) + Conds:RS_702._col0=RS_609._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_609] PartitionCols:_col0 - Select Operator [SEL_600] (rows=18262 width=1119) + Select Operator [SEL_606] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_599] (rows=18262 width=1119) + Filter Operator [FIL_605] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_698] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 44 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_702] PartitionCols:_col0 - Select Operator [SEL_697] (rows=144002668 width=135) + Select Operator [SEL_701] (rows=143930993 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_696] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_143_item_i_item_sk_min) AND DynamicValue(RS_143_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_143_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_239_date_dim_d_date_sk_min) AND DynamicValue(RS_239_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_239_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_124] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_693] - Group By Operator [GBY_692] (rows=1 width=12) + Filter Operator [FIL_700] (rows=143930993 width=127) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_246_item_i_item_sk_min) AND DynamicValue(RS_246_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_246_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_243_date_dim_d_date_sk_min) AND DynamicValue(RS_243_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_243_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_126] (rows=144002668 width=127) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_697] + Group By Operator [GBY_696] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_608] - Group By Operator [GBY_606] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_614] + Group By Operator [GBY_612] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_604] (rows=18262 width=1119) + Select Operator [SEL_610] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_600] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_695] - Group By Operator [GBY_694] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_606] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_699] + Group By Operator [GBY_698] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_627] - Group By Operator [GBY_625] (rows=1 width=12) + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_643] + Group By Operator [GBY_641] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_623] (rows=462000 width=1436) + Select Operator [SEL_639] (rows=2235 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_620] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_245] + Please refer to the previous Group By Operator [GBY_635] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_249] PartitionCols:_col0 - Select Operator [SEL_237] (rows=105599202 width=433) + Select Operator [SEL_241] (rows=471875 width=4) Output:["_col0"] - Filter Operator [FIL_236] (rows=105599202 width=433) + Filter Operator [FIL_240] (rows=471875 width=228) predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_587] (rows=316797606 width=433) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_688] - Group By Operator [GBY_686] (rows=316797606 width=88) + Merge Join Operator [MERGEJOIN_593] (rows=1415625 width=228) + Conds:(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_237] + Merge Join Operator [MERGEJOIN_592] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_708] + Select Operator [SEL_707] (rows=1 width=8) + Filter Operator [FIL_706] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_705] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_704] (rows=1 width=8) + Group By Operator [GBY_703] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_676] + Group By Operator [GBY_672] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_668] (rows=11859 width=116) + Output:["_col0"] + Group By Operator [GBY_665] (rows=11859 width=116) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Group By Operator [GBY_50] (rows=11859 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_48] (rows=18762463 width=4) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_575] (rows=18762463 width=4) + Conds:RS_45._col1=RS_663._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_663] + PartitionCols:_col0 + Select Operator [SEL_661] (rows=80000000 width=4) + Output:["_col0"] + Filter Operator [FIL_660] (rows=80000000 width=4) + predicate:c_customer_sk is not null + TableScan [TS_96] (rows=80000000 width=4) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_574] (rows=18762463 width=0) + Conds:RS_659._col0=RS_651._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 36 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_651] + PartitionCols:_col0 + Select Operator [SEL_650] (rows=2609 width=8) + Output:["_col0"] + Filter Operator [FIL_649] (rows=2609 width=8) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_36] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_659] + PartitionCols:_col0 + Select Operator [SEL_658] (rows=525327388 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_657] (rows=525327388 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_33] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_656] + Group By Operator [GBY_655] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 36 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_654] + Group By Operator [GBY_653] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_652] (rows=2609 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_650] + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_710] + Group By Operator [GBY_709] (rows=1 width=112) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_677] + Group By Operator [GBY_673] (rows=1 width=112) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_669] (rows=11859 width=116) + Output:["_col1"] + Please refer to the previous Group By Operator [GBY_665] + <-Reducer 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_717] + Group By Operator [GBY_716] (rows=1415625 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 40 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_231] PartitionCols:_col0 - Group By Operator [GBY_104] (rows=633595212 width=88) + Group By Operator [GBY_230] (rows=550080312 width=116) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_102] (rows=633595212 width=88) + Select Operator [SEL_228] (rows=550080312 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_574] (rows=633595212 width=88) - Conds:RS_685._col0=RS_665._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_665] + Merge Join Operator [MERGEJOIN_586] (rows=550080312 width=114) + Conds:RS_715._col0=RS_664._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_664] PartitionCols:_col0 - Select Operator [SEL_664] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_663] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_96] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_685] + Please refer to the previous Select Operator [SEL_661] + <-Map 45 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_715] PartitionCols:_col0 - Select Operator [SEL_684] (rows=575995635 width=88) + Select Operator [SEL_714] (rows=550080312 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_683] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_93] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_716] - Select Operator [SEL_715] (rows=1 width=120) - Filter Operator [FIL_714] (rows=1 width=120) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_713] (rows=1 width=120) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_712] (rows=1 width=120) - Group By Operator [GBY_711] (rows=1 width=120) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_709] - Group By Operator [GBY_707] (rows=1 width=120) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_705] (rows=348477374 width=88) - Output:["_col0"] - Group By Operator [GBY_704] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_175] - PartitionCols:_col0 - Group By Operator [GBY_174] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_172] (rows=696954748 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_579] (rows=696954748 width=88) - Conds:RS_169._col1=RS_667._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_667] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_664] - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_169] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_578] (rows=633595212 width=88) - Conds:RS_703._col0=RS_652._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 30 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_652] - PartitionCols:_col0 - Select Operator [SEL_649] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_648] (rows=73049 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_36] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 44 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_703] - PartitionCols:_col0 - Select Operator [SEL_702] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_701] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_167_date_dim_d_date_sk_min) AND DynamicValue(RS_167_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_167_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_157] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_700] - Group By Operator [GBY_699] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_657] - Group By Operator [GBY_655] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_653] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_649] - <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_718] - Group By Operator [GBY_717] (rows=1 width=224) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_710] - Group By Operator [GBY_708] (rows=1 width=224) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_706] (rows=348477374 width=88) - Output:["_col1"] - Please refer to the previous Group By Operator [GBY_704] + Filter Operator [FIL_713] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_248_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_248_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_248_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_219] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_712] + Group By Operator [GBY_711] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_562] + Group By Operator [GBY_561] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_560] (rows=155 width=0) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_588] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_593] - Group By Operator [GBY_592] (rows=1 width=112) + Reduce Output Operator [RS_599] + Group By Operator [GBY_598] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_590] (rows=383314495 width=135) + Select Operator [SEL_596] (rows=1 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_589] (rows=383314495 width=135) - Conds:RS_120._col1=RS_121._col0(Inner),Output:["_col3","_col4"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_121] - PartitionCols:_col0 - Select Operator [SEL_113] (rows=105599202 width=433) - Output:["_col0"] - Filter Operator [FIL_112] (rows=105599202 width=433) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_585] (rows=316797606 width=433) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_687] - Please refer to the previous Group By Operator [GBY_686] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_680] - Select Operator [SEL_679] (rows=1 width=120) - Filter Operator [FIL_678] (rows=1 width=120) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_677] (rows=1 width=120) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_676] (rows=1 width=120) - Group By Operator [GBY_675] (rows=1 width=120) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_673] - Group By Operator [GBY_671] (rows=1 width=120) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_669] (rows=348477374 width=88) - Output:["_col0"] - Group By Operator [GBY_668] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_48] (rows=696954748 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_571] (rows=696954748 width=88) - Conds:RS_45._col1=RS_666._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_666] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_664] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_570] (rows=633595212 width=88) - Conds:RS_662._col0=RS_650._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 30 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_650] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_649] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_662] - PartitionCols:_col0 - Select Operator [SEL_661] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_660] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_33] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_659] - Group By Operator [GBY_658] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_656] - Group By Operator [GBY_654] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_651] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_649] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_682] - Group By Operator [GBY_681] (rows=1 width=224) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_674] - Group By Operator [GBY_672] (rows=1 width=224) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_670] (rows=348477374 width=88) - Output:["_col1"] - Please refer to the previous Group By Operator [GBY_668] + Merge Join Operator [MERGEJOIN_595] (rows=1 width=116) + Conds:RS_122._col1=RS_123._col0(Inner),Output:["_col3","_col4"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_120] + PARTITION_ONLY_SHUFFLE [RS_122] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_583] (rows=348467716 width=135) - Conds:RS_117._col2=RS_638._col0(Inner),Output:["_col1","_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_638] + Merge Join Operator [MERGEJOIN_587] (rows=304 width=0) + Conds:RS_119._col2=RS_636._col0(Inner),Output:["_col1","_col3","_col4"] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_636] PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_637] + Please refer to the previous Group By Operator [GBY_635] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_117] + SHUFFLE [RS_119] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_567] (rows=316788826 width=135) - Conds:RS_647._col0=RS_601._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_601] + Merge Join Operator [MERGEJOIN_571] (rows=7751875 width=101) + Conds:RS_648._col0=RS_607._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_607] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_600] + Please refer to the previous Select Operator [SEL_606] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_647] + SHUFFLE [RS_648] PartitionCols:_col0 - Select Operator [SEL_646] (rows=287989836 width=135) + Select Operator [SEL_647] (rows=285117831 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_645] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_118_item_i_item_sk_min) AND DynamicValue(RS_118_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_118_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_115_date_dim_d_date_sk_min) AND DynamicValue(RS_115_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_115_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_644] - Group By Operator [GBY_643] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=58079560)"] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized + Filter Operator [FIL_646] (rows=285117831 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_120_item_i_item_sk_min) AND DynamicValue(RS_120_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_120_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_117_date_dim_d_date_sk_min) AND DynamicValue(RS_117_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_117_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_645] + Group By Operator [GBY_644] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_642] - Group By Operator [GBY_641] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=58079560)"] - Select Operator [SEL_639] (rows=58079562 width=88) + Group By Operator [GBY_640] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_637] (rows=2235 width=4) Output:["_col0"] - Please refer to the previous Group By Operator [GBY_637] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_610] - Group By Operator [GBY_609] (rows=1 width=12) + Please refer to the previous Group By Operator [GBY_635] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_616] + Group By Operator [GBY_615] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_607] - Group By Operator [GBY_605] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_613] + Group By Operator [GBY_611] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_602] (rows=18262 width=1119) + Select Operator [SEL_608] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_600] + Please refer to the previous Select Operator [SEL_606] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_115] (rows=471875 width=4) + Output:["_col0"] + Filter Operator [FIL_114] (rows=471875 width=228) + predicate:(_col3 > (0.95 * _col1)) + Merge Join Operator [MERGEJOIN_590] (rows=1415625 width=228) + Conds:(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_111] + Merge Join Operator [MERGEJOIN_589] (rows=1 width=112) + Conds:(Inner),Output:["_col1"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_683] + Select Operator [SEL_682] (rows=1 width=8) + Filter Operator [FIL_681] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_680] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_679] (rows=1 width=8) + Group By Operator [GBY_678] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_674] + Group By Operator [GBY_670] (rows=1 width=8) + Output:["_col0"],aggregations:["count(_col0)"] + Select Operator [SEL_666] (rows=11859 width=116) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_665] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_685] + Group By Operator [GBY_684] (rows=1 width=112) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_675] + Group By Operator [GBY_671] (rows=1 width=112) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_667] (rows=11859 width=116) + Output:["_col1"] + Please refer to the previous Group By Operator [GBY_665] + <-Reducer 40 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_692] + Group By Operator [GBY_691] (rows=1415625 width=116) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col0 + Group By Operator [GBY_104] (rows=550080312 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_102] (rows=550080312 width=114) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_578] (rows=550080312 width=114) + Conds:RS_690._col0=RS_662._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_662] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_661] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_690] + PartitionCols:_col0 + Select Operator [SEL_689] (rows=550080312 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_688] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_122_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_122_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_122_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_93] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_687] + Group By Operator [GBY_686] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_458] + Group By Operator [GBY_457] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_456] (rows=304 width=0) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_587] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 5db3c31ea6..902358a524 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -114,30 +114,23 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE) -Map 31 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 20 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 30 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 23 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 25 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 30 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) @@ -147,285 +140,208 @@ Stage-0 Stage-1 Reducer 8 File Output Operator [FS_91] - Select Operator [SEL_90] (rows=77303902 width=321) + Select Operator [SEL_90] (rows=78393744 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_89] (rows=77303902 width=321) + Filter Operator [FIL_89] (rows=78393744 width=492) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_290] (rows=231911707 width=321) + Merge Join Operator [MERGEJOIN_290] (rows=235181232 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_380] - Select Operator [SEL_379] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_345] + Select Operator [SEL_344] (rows=1 width=112) Output:["_col0"] - Group By Operator [GBY_378] (rows=1 width=232) + Group By Operator [GBY_343] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_377] - Group By Operator [GBY_376] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_341] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_375] (rows=463823414 width=88) + Select Operator [SEL_340] (rows=2121289008973 width=932) Output:["_col10"] - Group By Operator [GBY_374] (rows=463823414 width=88) + Group By Operator [GBY_339] (rows=2121289008973 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_77] (rows=927646829 width=88) + Group By Operator [GBY_77] (rows=2121289008973 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col11, _col12, _col6, _col8, _col15, _col16, _col17, _col18, _col19, _col22 - Merge Join Operator [MERGEJOIN_289] (rows=927646829 width=88) - Conds:RS_73._col9, _col13=RS_355._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] + Merge Join Operator [MERGEJOIN_289] (rows=2121289008973 width=932) + Conds:RS_73._col9, _col13=RS_328._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] PartitionCols:_col1, upper(_col2) - Select Operator [SEL_353] (rows=40000000 width=1014) + Select Operator [SEL_326] (rows=40000000 width=272) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_352] (rows=40000000 width=1014) + Filter Operator [FIL_325] (rows=40000000 width=272) predicate:(ca_zip is not null and upper(ca_country) is not null) - TableScan [TS_15] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_state","ca_zip","ca_country"] + TableScan [TS_15] (rows=40000000 width=272) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_state","ca_zip","ca_country"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_73] PartitionCols:_col9, _col13 - Merge Join Operator [MERGEJOIN_288] (rows=843315281 width=88) - Conds:RS_70._col0, _col3=RS_334._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] + Merge Join Operator [MERGEJOIN_288] (rows=537799796 width=1023) + Conds:RS_70._col0, _col3=RS_324._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col0, _col1 - Select Operator [SEL_330] (rows=57591150 width=77) + Select Operator [SEL_322] (rows=57591150 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=57591150 width=77) + Filter Operator [FIL_321] (rows=57591150 width=8) predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + TableScan [TS_12] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_287] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_287] (rows=385681992 width=1029) Conds:RS_67._col0=RS_297._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_294] (rows=462000 width=1436) + Select Operator [SEL_294] (rows=462000 width=384) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_292] (rows=462000 width=1436) + Filter Operator [FIL_292] (rows=462000 width=384) predicate:i_item_sk is not null - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 20 [SIMPLE_EDGE] + TableScan [TS_3] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_286] (rows=696954748 width=88) - Conds:RS_64._col1=RS_321._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Merge Join Operator [MERGEJOIN_286] (rows=385681992 width=648) + Conds:RS_64._col1=RS_320._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Select Operator [SEL_318] (rows=80000000 width=860) + Select Operator [SEL_318] (rows=80000000 width=276) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_317] (rows=80000000 width=860) + Filter Operator [FIL_317] (rows=80000000 width=276) predicate:(c_birth_country is not null and c_customer_sk is not null) - TableScan [TS_9] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 19 [SIMPLE_EDGE] + TableScan [TS_9] (rows=80000000 width=276) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_285] (rows=633595212 width=88) - Conds:RS_373._col2=RS_309._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] + Merge Join Operator [MERGEJOIN_285] (rows=385681992 width=379) + Conds:RS_338._col2=RS_306._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] PartitionCols:_col0 - Select Operator [SEL_306] (rows=852 width=1910) + Select Operator [SEL_303] (rows=155 width=271) Output:["_col0","_col1","_col3","_col4"] - Filter Operator [FIL_305] (rows=852 width=1910) + Filter Operator [FIL_302] (rows=155 width=271) predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_373] + TableScan [TS_6] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] PartitionCols:_col2 - Select Operator [SEL_372] (rows=575995635 width=88) + Select Operator [SEL_337] (rows=525333486 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_371] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_65_customer_c_customer_sk_min) AND DynamicValue(RS_65_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_65_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_68_item_i_item_sk_min) AND DynamicValue(RS_68_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_68_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_71_store_returns_sr_item_sk_min) AND DynamicValue(RS_71_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_71_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_62_store_s_store_sk_min) AND DynamicValue(RS_62_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_62_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_71_store_returns_sr_ticket_number_min) AND DynamicValue(RS_71_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_71_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_43] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_294] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) + Filter Operator [FIL_336] (rows=525333486 width=122) + predicate:((ss_store_sk BETWEEN DynamicValue(RS_62_store_s_store_sk_min) AND DynamicValue(RS_62_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_62_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_43] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - Group By Operator [GBY_312] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + Group By Operator [GBY_309] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_310] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_306] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_364] - Group By Operator [GBY_363] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] - Group By Operator [GBY_324] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_322] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_368] - Group By Operator [GBY_367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] - Group By Operator [GBY_339] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_335] (rows=57591150 width=77) + Select Operator [SEL_307] (rows=155 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_370] - Group By Operator [GBY_369] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_336] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_303] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_360] - Select Operator [SEL_359] (rows=231911707 width=88) + PARTITION_ONLY_SHUFFLE [RS_333] + Select Operator [SEL_332] (rows=235181232 width=380) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_358] (rows=231911707 width=88) + Group By Operator [GBY_331] (rows=235181232 width=380) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col1, _col2, _col7 - Select Operator [SEL_357] (rows=463823414 width=88) + Select Operator [SEL_330] (rows=365777643230 width=843) Output:["_col1","_col2","_col7","_col9"] - Group By Operator [GBY_356] (rows=463823414 width=88) + Group By Operator [GBY_329] (rows=365777643230 width=843) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_34] (rows=927646829 width=88) + Group By Operator [GBY_34] (rows=365777643230 width=843) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col17, _col18, _col12, _col22, _col6, _col7, _col9, _col10, _col14 - Merge Join Operator [MERGEJOIN_284] (rows=927646829 width=88) - Conds:RS_30._col15, _col19=RS_354._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] + Merge Join Operator [MERGEJOIN_284] (rows=365777643230 width=843) + Conds:RS_30._col15, _col19=RS_327._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col1, upper(_col2) - Please refer to the previous Select Operator [SEL_353] + Please refer to the previous Select Operator [SEL_326] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col15, _col19 - Merge Join Operator [MERGEJOIN_283] (rows=843315281 width=88) - Conds:RS_27._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + Merge Join Operator [MERGEJOIN_283] (rows=92733777 width=910) + Conds:RS_27._col0, _col3=RS_323._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_322] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_282] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_282] (rows=56246341 width=899) Conds:RS_24._col1=RS_319._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] - <-Map 22 [SIMPLE_EDGE] vectorized + <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_319] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_318] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_281] (rows=696954748 width=88) - Conds:RS_21._col2=RS_307._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Merge Join Operator [MERGEJOIN_281] (rows=56246341 width=630) + Conds:RS_21._col2=RS_304._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_306] + Please refer to the previous Select Operator [SEL_303] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_280] (rows=633595212 width=88) - Conds:RS_351._col0=RS_295._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_280] (rows=76612563 width=382) + Conds:RS_316._col0=RS_295._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_293] (rows=231000 width=1436) + Select Operator [SEL_293] (rows=7000 width=385) Output:["_col0","_col1","_col2","_col4","_col5"] - Filter Operator [FIL_291] (rows=231000 width=1436) + Filter Operator [FIL_291] (rows=7000 width=384) predicate:((i_color = 'orchid') and i_item_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_351] + SHUFFLE [RS_316] PartitionCols:_col0 - Select Operator [SEL_350] (rows=575995635 width=88) + Select Operator [SEL_315] (rows=525333486 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_349] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_25_customer_c_customer_sk_min) AND DynamicValue(RS_25_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_25_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_28_store_returns_sr_ticket_number_min) AND DynamicValue(RS_28_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_28_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + Filter Operator [FIL_314] (rows=525333486 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_304] - Group By Operator [GBY_303] (rows=1 width=12) + BROADCAST [RS_301] + Group By Operator [GBY_300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - Group By Operator [GBY_299] (rows=1 width=12) + SHUFFLE [RS_299] + Group By Operator [GBY_298] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=231000 width=1436) + Select Operator [SEL_296] (rows=7000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_293] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_316] - Group By Operator [GBY_315] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_313] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] - Group By Operator [GBY_311] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] + Group By Operator [GBY_308] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_308] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_306] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_328] - Group By Operator [GBY_327] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] - Group By Operator [GBY_323] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_320] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_332] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_348] - Group By Operator [GBY_347] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] - Group By Operator [GBY_338] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_333] (rows=57591150 width=77) + Select Operator [SEL_305] (rows=155 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_303] diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index dc15890b24..5d1c9fc1df 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -109,20 +109,20 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 16 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) -Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 17 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 18 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -132,188 +132,185 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_270] - Limit [LIM_269] (rows=100 width=88) + File Output Operator [FS_263] + Limit [LIM_262] (rows=100 width=808) Number of rows:100 - Select Operator [SEL_268] (rows=421657640 width=88) + Select Operator [SEL_261] (rows=4248052806 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_267] - Group By Operator [GBY_266] (rows=421657640 width=88) + SHUFFLE [RS_260] + Group By Operator [GBY_259] (rows=4248052806 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_48] (rows=843315281 width=88) + Group By Operator [GBY_48] (rows=4248052806 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col20)","sum(_col12)"],keys:_col25, _col26, _col28, _col29 - Top N Key Operator [TNK_95] (rows=843315281 width=88) + Top N Key Operator [TNK_95] (rows=4248052806 width=807) keys:_col25, _col26, _col28, _col29,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_214] (rows=843315281 width=88) - Conds:RS_44._col3=RS_252._col0(Inner),Output:["_col5","_col12","_col20","_col25","_col26","_col28","_col29"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_252] + Merge Join Operator [MERGEJOIN_214] (rows=4248052806 width=807) + Conds:RS_44._col3=RS_258._col0(Inner),Output:["_col5","_col12","_col20","_col25","_col26","_col28","_col29"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_258] PartitionCols:_col0 - Select Operator [SEL_251] (rows=1704 width=1910) + Select Operator [SEL_257] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_250] (rows=1704 width=1910) + Filter Operator [FIL_256] (rows=1704 width=192) predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + TableScan [TS_32] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_213] (rows=766650239 width=88) - Conds:RS_41._col1=RS_243._col0(Inner),Output:["_col3","_col5","_col12","_col20","_col25","_col26"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] + Merge Join Operator [MERGEJOIN_213] (rows=4248052806 width=623) + Conds:RS_41._col1=RS_255._col0(Inner),Output:["_col3","_col5","_col12","_col20","_col25","_col26"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] PartitionCols:_col0 - Select Operator [SEL_242] (rows=462000 width=1436) + Select Operator [SEL_254] (rows=462000 width=288) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_241] (rows=462000 width=1436) + Filter Operator [FIL_253] (rows=462000 width=288) predicate:i_item_sk is not null - TableScan [TS_29] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + TableScan [TS_29] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_212] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_212] (rows=4248052806 width=343) Conds:RS_38._col1, _col2, _col4=RS_39._col8, _col9, _col10(Inner),Output:["_col1","_col3","_col5","_col12","_col20"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col8, _col9, _col10 - Merge Join Operator [MERGEJOIN_211] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_211] (rows=1893811716 width=235) Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] - <-Reducer 13 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_26] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_210] (rows=63350266 width=77) - Conds:RS_234._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_225] - PartitionCols:_col0 - Select Operator [SEL_220] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_217] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - PartitionCols:_col0 - Select Operator [SEL_233] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_232] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_25] + PARTITION_ONLY_SHUFFLE [RS_25] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=316788826 width=135) - Conds:RS_265._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_209] (rows=54418158 width=119) + Conds:RS_245._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_219] (rows=36524 width=1119) + Select Operator [SEL_219] (rows=351 width=12) Output:["_col0"] - Filter Operator [FIL_216] (rows=36524 width=1119) + Filter Operator [FIL_216] (rows=351 width=12) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) - Please refer to the previous TableScan [TS_3] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] PartitionCols:_col0 - Select Operator [SEL_264] (rows=287989836 width=135) + Select Operator [SEL_244] (rows=285117831 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_263] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_243] (rows=285117831 width=123) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] + <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_237] Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_108] Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_106] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_106] (rows=9402909 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_240] - Group By Operator [GBY_238] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_210] (rows=9402909 width=100) + Conds:RS_234._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=351 width=12) + Output:["_col0"] + Filter Operator [FIL_217] (rows=351 width=12) + predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) + Please refer to the previous TableScan [TS_3] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + PartitionCols:_col0 + Select Operator [SEL_233] (rows=53632139 width=123) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_232] (rows=53632139 width=123) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_12] (rows=57591150 width=123) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_242] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_123] Group By Operator [GBY_122] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_121] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_210] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_249] - Group By Operator [GBY_247] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_246] - Group By Operator [GBY_245] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_244] (rows=462000 width=1436) + Select Operator [SEL_121] (rows=9402909 width=6) Output:["_col0"] - Please refer to the previous Select Operator [SEL_242] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_262] - Group By Operator [GBY_261] (rows=1 width=12) + Please refer to the previous Merge Join Operator [MERGEJOIN_210] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_229] Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_224] (rows=36524 width=1119) + Select Operator [SEL_224] (rows=351 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_219] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_26] + PartitionCols:_col1, _col2 + Please refer to the previous Merge Join Operator [MERGEJOIN_210] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_208] (rows=633595212 width=88) - Conds:RS_260._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_208] (rows=13737330 width=8) + Conds:RS_252._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_218] (rows=18262 width=1119) + Select Operator [SEL_218] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_215] (rows=18262 width=1119) + Filter Operator [FIL_215] (rows=50 width=12) predicate:((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] + SHUFFLE [RS_252] PartitionCols:_col0 - Select Operator [SEL_259] (rows=575995635 width=88) + Select Operator [SEL_251] (rows=501694138 width=126) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_258] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_250] (rows=501694138 width=126) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_25_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_25_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_25_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_25_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_25_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=126) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_236] Please refer to the previous Group By Operator [GBY_235] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Please refer to the previous Group By Operator [GBY_238] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_248] - Please refer to the previous Group By Operator [GBY_247] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_257] - Group By Operator [GBY_256] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_241] + Please refer to the previous Group By Operator [GBY_240] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_247] + Group By Operator [GBY_246] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_111] (rows=54418158 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_249] + Group By Operator [GBY_248] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] - Group By Operator [GBY_254] (rows=1 width=12) + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_128] + Group By Operator [GBY_127] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_253] (rows=1704 width=1910) + Select Operator [SEL_126] (rows=54418158 width=7) Output:["_col0"] - Please refer to the previous Select Operator [SEL_251] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_231] Group By Operator [GBY_230] (rows=1 width=12) @@ -322,7 +319,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_228] Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=18262 width=1119) + Select Operator [SEL_222] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_218] diff --git a/ql/src/test/results/clientpositive/perf/tez/query26.q.out b/ql/src/test/results/clientpositive/perf/tez/query26.q.out index c8d9a108b7..7c42069758 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query26.q.out @@ -53,14 +53,12 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -70,86 +68,86 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_140] - Limit [LIM_139] (rows=100 width=135) + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_138] (rows=210822976 width=135) + Select Operator [SEL_128] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] - Select Operator [SEL_136] (rows=210822976 width=135) + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_135] (rows=210822976 width=135) + Group By Operator [GBY_125] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 - Group By Operator [GBY_28] (rows=421645953 width=135) + Group By Operator [GBY_28] (rows=462000 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col18 - Top N Key Operator [TNK_55] (rows=421645953 width=135) + Top N Key Operator [TNK_55] (rows=809521 width=100) keys:_col18,sort order:+,top n:100 - Merge Join Operator [MERGEJOIN_99] (rows=421645953 width=135) - Conds:RS_24._col2=RS_126._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + Merge Join Operator [MERGEJOIN_99] (rows=809521 width=100) + Conds:RS_24._col2=RS_124._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Select Operator [SEL_125] (rows=462000 width=1436) + Select Operator [SEL_123] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=462000 width=1436) + Filter Operator [FIL_122] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + TableScan [TS_12] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_98] (rows=383314495 width=135) - Conds:RS_21._col3=RS_118._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_98] (rows=809521 width=4) + Conds:RS_21._col3=RS_121._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_121] PartitionCols:_col0 - Select Operator [SEL_117] (rows=2300 width=1179) + Select Operator [SEL_120] (rows=2300 width=174) Output:["_col0"] - Filter Operator [FIL_116] (rows=2300 width=1179) + Filter Operator [FIL_119] (rows=2300 width=174) predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) - TableScan [TS_9] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_email","p_channel_event"] + TableScan [TS_9] (rows=2300 width=174) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_97] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_97] (rows=809521 width=4) Conds:RS_18._col0=RS_110._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_109] (rows=36524 width=1119) + Select Operator [SEL_109] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_108] (rows=36524 width=1119) + Filter Operator [FIL_108] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=316788826 width=135) - Conds:RS_134._col1=RS_102._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_96] (rows=2283326 width=135) + Conds:RS_118._col1=RS_102._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_101] (rows=232725 width=385) + Select Operator [SEL_101] (rows=14776 width=265) Output:["_col0"] - Filter Operator [FIL_100] (rows=232725 width=385) + Filter Operator [FIL_100] (rows=14776 width=268) predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_134] + SHUFFLE [RS_118] PartitionCols:_col1 - Select Operator [SEL_133] (rows=287989836 width=135) + Select Operator [SEL_117] (rows=283691050 width=354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_132] (rows=287989836 width=135) - predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (cs_promo_sk BETWEEN DynamicValue(RS_22_promotion_p_promo_sk_min) AND DynamicValue(RS_22_promotion_p_promo_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_22_promotion_p_promo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] + Filter Operator [FIL_116] (rows=283691050 width=354) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=354) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_115] Group By Operator [GBY_114] (rows=1 width=12) @@ -158,31 +156,9 @@ Stage-0 SHUFFLE [RS_113] Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_111] (rows=36524 width=1119) + Select Operator [SEL_111] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_109] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_119] (rows=2300 width=1179) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_117] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_127] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_125] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_107] Group By Operator [GBY_106] (rows=1 width=12) @@ -191,7 +167,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_105] Group By Operator [GBY_104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=232725 width=385) + Select Operator [SEL_103] (rows=14776 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_101] diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out index 59e3c23f50..59cca4f94f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -57,10 +57,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -74,88 +73,88 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_141] - Limit [LIM_140] (rows=100 width=88) + File Output Operator [FS_136] + Limit [LIM_135] (rows=100 width=538) Number of rows:100 - Select Operator [SEL_139] (rows=1264972921 width=88) + Select Operator [SEL_134] (rows=4281825 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - Select Operator [SEL_137] (rows=1264972921 width=88) + SHUFFLE [RS_133] + Select Operator [SEL_132] (rows=4281825 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_136] (rows=1264972921 width=88) + Group By Operator [GBY_131] (rows=4281825 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_29] (rows=2529945843 width=88) + Group By Operator [GBY_29] (rows=4281825 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_56] (rows=843315281 width=88) + Top N Key Operator [TNK_56] (rows=1427275 width=186) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_27] (rows=843315281 width=88) + Select Operator [SEL_27] (rows=1427275 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=843315281 width=88) - Conds:RS_24._col1=RS_127._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] + Merge Join Operator [MERGEJOIN_100] (rows=1427275 width=186) + Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_127] + SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_126] (rows=462000 width=1436) + Select Operator [SEL_129] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_125] (rows=462000 width=1436) + Filter Operator [FIL_128] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + TableScan [TS_12] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_99] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_99] (rows=1427275 width=90) Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_118] (rows=1704 width=1910) + Select Operator [SEL_118] (rows=209 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_117] (rows=1704 width=1910) + Filter Operator [FIL_117] (rows=209 width=90) predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + TableScan [TS_9] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_98] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_98] (rows=1441779 width=4) Conds:RS_18._col0=RS_111._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_110] (rows=36524 width=1119) + Select Operator [SEL_110] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_109] (rows=36524 width=1119) + Filter Operator [FIL_109] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_97] (rows=633595212 width=88) - Conds:RS_135._col2=RS_103._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_97] (rows=4037920 width=4) + Conds:RS_127._col2=RS_103._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=232725 width=385) + Select Operator [SEL_102] (rows=14776 width=269) Output:["_col0"] - Filter Operator [FIL_101] (rows=232725 width=385) + Filter Operator [FIL_101] (rows=14776 width=268) predicate:((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + SHUFFLE [RS_127] PartitionCols:_col2 - Select Operator [SEL_134] (rows=575995635 width=88) + Select Operator [SEL_126] (rows=501690006 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_133] (rows=575995635 width=88) - predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + Filter Operator [FIL_125] (rows=501690006 width=340) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=340) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_116] Group By Operator [GBY_115] (rows=1 width=12) @@ -164,7 +163,7 @@ Stage-0 SHUFFLE [RS_114] Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_112] (rows=36524 width=1119) + Select Operator [SEL_112] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_110] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -175,20 +174,9 @@ Stage-0 SHUFFLE [RS_122] Group By Operator [GBY_121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_120] (rows=1704 width=1910) + Select Operator [SEL_120] (rows=209 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_118] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_128] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_126] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_108] Group By Operator [GBY_107] (rows=1 width=12) @@ -197,7 +185,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_106] Group By Operator [GBY_105] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=232725 width=385) + Select Operator [SEL_104] (rows=14776 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_102] diff --git a/ql/src/test/results/clientpositive/perf/tez/query28.q.out b/ql/src/test/results/clientpositive/perf/tez/query28.q.out index a26f01c501..c7fd970eae 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query28.q.out @@ -1,4 +1,8 @@ -Warning: Shuffle Join MERGEJOIN[94][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain select * from (select avg(ss_list_price) B1_LP @@ -115,158 +119,178 @@ Reducer 11 <- Map 1 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 1 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 1 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 1 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 1 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 1 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 - File Output Operator [FS_51] - Limit [LIM_50] (rows=1 width=1393) + Reducer 8 + File Output Operator [FS_59] + Limit [LIM_58] (rows=1 width=768) Number of rows:100 - Select Operator [SEL_49] (rows=1 width=1393) + Select Operator [SEL_57] (rows=1 width=768) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Merge Join Operator [MERGEJOIN_94] (rows=1 width=1393) - Conds:(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_142] - Select Operator [SEL_141] (rows=1 width=232) + Merge Join Operator [MERGEJOIN_106] (rows=1 width=768) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_166] + Select Operator [SEL_165] (rows=1 width=128) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_140] (rows=1 width=232) + Group By Operator [GBY_164] (rows=1 width=128) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_139] - Group By Operator [GBY_138] (rows=1 width=232) + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=128) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_137] (rows=191998545 width=88) + Group By Operator [GBY_161] (rows=7618989 width=226) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + SHUFFLE [RS_130] PartitionCols:_col0 - Group By Operator [GBY_110] (rows=191998545 width=88) + Group By Operator [GBY_124] (rows=7618989 width=226) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_104] (rows=191998545 width=88) + Select Operator [SEL_118] (rows=15237978 width=324) Output:["ss_list_price"] - Filter Operator [FIL_98] (rows=191998545 width=88) - predicate:((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_148] - Select Operator [SEL_147] (rows=1 width=232) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_146] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_145] - Group By Operator [GBY_144] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_143] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] - PartitionCols:_col0 - Group By Operator [GBY_111] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_105] (rows=191998545 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_99] (rows=191998545 width=88) - predicate:((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) - Please refer to the previous TableScan [TS_0] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_154] - Select Operator [SEL_153] (rows=1 width=232) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_152] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_151] - Group By Operator [GBY_150] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_149] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - PartitionCols:_col0 - Group By Operator [GBY_112] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_106] (rows=191998545 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_100] (rows=191998545 width=88) + Filter Operator [FIL_112] (rows=15237978 width=324) predicate:((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) - Please refer to the previous TableScan [TS_0] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - Select Operator [SEL_123] (rows=1 width=232) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_122] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_119] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] - PartitionCols:_col0 - Group By Operator [GBY_107] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_101] (rows=191998545 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_95] (rows=191998545 width=88) - predicate:((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) - Please refer to the previous TableScan [TS_0] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_130] - Select Operator [SEL_129] (rows=1 width=232) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_128] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_125] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] - PartitionCols:_col0 - Group By Operator [GBY_108] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_102] (rows=191998545 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_96] (rows=191998545 width=88) - predicate:((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) - Please refer to the previous TableScan [TS_0] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_136] - Select Operator [SEL_135] (rows=1 width=232) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_134] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_133] - Group By Operator [GBY_132] (rows=1 width=232) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_131] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] - PartitionCols:_col0 - Group By Operator [GBY_109] (rows=191998545 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_103] (rows=191998545 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_97] (rows=191998545 width=88) - predicate:((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) - Please refer to the previous TableScan [TS_0] + TableScan [TS_0] (rows=575995635 width=324) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_54] + Merge Join Operator [MERGEJOIN_105] (rows=1 width=640) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_160] + Select Operator [SEL_159] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_158] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_155] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Group By Operator [GBY_123] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_117] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_111] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) + Please refer to the previous TableScan [TS_0] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_51] + Merge Join Operator [MERGEJOIN_104] (rows=1 width=512) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_154] + Select Operator [SEL_153] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_152] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_149] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Group By Operator [GBY_122] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_116] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_110] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) + Please refer to the previous TableScan [TS_0] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_48] + Merge Join Operator [MERGEJOIN_103] (rows=1 width=384) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_148] + Select Operator [SEL_147] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_146] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_145] + Group By Operator [GBY_144] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_143] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col0 + Group By Operator [GBY_121] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_115] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_109] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) + Please refer to the previous TableScan [TS_0] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_45] + Merge Join Operator [MERGEJOIN_102] (rows=1 width=256) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_142] + Select Operator [SEL_141] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_140] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_139] + Group By Operator [GBY_138] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_137] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Group By Operator [GBY_120] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_114] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_108] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) + Please refer to the previous TableScan [TS_0] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_136] + Select Operator [SEL_135] (rows=1 width=128) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_134] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Group By Operator [GBY_132] (rows=1 width=128) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_131] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_119] (rows=7618989 width=226) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_113] (rows=15237978 width=324) + Output:["ss_list_price"] + Filter Operator [FIL_107] (rows=15237978 width=324) + predicate:((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) + Please refer to the previous TableScan [TS_0] diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index b291bb4ba3..a21c3c789e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -107,226 +107,226 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Map 8 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) -Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 20 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 10 <- Reducer 16 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 13 <- Map 22 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_260] - Limit [LIM_259] (rows=100 width=88) + File Output Operator [FS_254] + Limit [LIM_253] (rows=100 width=496) Number of rows:100 - Select Operator [SEL_258] (rows=463823414 width=88) + Select Operator [SEL_252] (rows=21091879 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] - Group By Operator [GBY_256] (rows=463823414 width=88) + SHUFFLE [RS_251] + Group By Operator [GBY_250] (rows=21091879 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_48] (rows=927646829 width=88) + Group By Operator [GBY_48] (rows=21091879 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7, _col8, _col27, _col28 - Top N Key Operator [TNK_93] (rows=927646829 width=88) + Top N Key Operator [TNK_93] (rows=4156223234 width=483) keys:_col7, _col8, _col27, _col28,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_205] (rows=927646829 width=88) + Merge Join Operator [MERGEJOIN_205] (rows=4156223234 width=483) Conds:RS_44._col1, _col2=RS_45._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_44] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_199] (rows=7638375 width=10) + Conds:RS_216._col0=RS_208._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=1957 width=8) + Output:["_col0"] + Filter Operator [FIL_206] (rows=1957 width=8) + predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_214] (rows=285117831 width=15) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_209] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_207] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col14, _col13 - Select Operator [SEL_40] (rows=843315281 width=88) + Select Operator [SEL_40] (rows=21091879 width=484) Output:["_col1","_col2","_col8","_col13","_col14","_col16","_col21","_col22"] - Merge Join Operator [MERGEJOIN_204] (rows=843315281 width=88) - Conds:RS_37._col3=RS_247._col0(Inner),Output:["_col5","_col10","_col11","_col13","_col18","_col19","_col21","_col22"] - <-Map 22 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_247] + Merge Join Operator [MERGEJOIN_204] (rows=21091879 width=484) + Conds:RS_37._col3=RS_249._col0(Inner),Output:["_col5","_col10","_col11","_col13","_col18","_col19","_col21","_col22"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] PartitionCols:_col0 - Select Operator [SEL_246] (rows=1704 width=1910) + Select Operator [SEL_248] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_245] (rows=1704 width=1910) + Filter Operator [FIL_247] (rows=1704 width=192) predicate:s_store_sk is not null - TableScan [TS_25] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 11 [SIMPLE_EDGE] + TableScan [TS_25] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=766650239 width=88) - Conds:RS_34._col1=RS_239._col0(Inner),Output:["_col3","_col5","_col10","_col11","_col13","_col18","_col19"] - <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_239] + Merge Join Operator [MERGEJOIN_203] (rows=21091879 width=298) + Conds:RS_34._col1=RS_246._col0(Inner),Output:["_col3","_col5","_col10","_col11","_col13","_col18","_col19"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] PartitionCols:_col0 - Select Operator [SEL_238] (rows=462000 width=1436) + Select Operator [SEL_245] (rows=462000 width=288) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_237] (rows=462000 width=1436) + Filter Operator [FIL_244] (rows=462000 width=288) predicate:i_item_sk is not null - TableScan [TS_22] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 10 [SIMPLE_EDGE] + TableScan [TS_22] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_202] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_202] (rows=21091879 width=18) Conds:RS_31._col1, _col2, _col4=RS_32._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col10","_col11","_col13"] - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_32] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_201] (rows=63350266 width=77) + Merge Join Operator [MERGEJOIN_201] (rows=5384572 width=13) Conds:RS_230._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized + <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_220] (rows=36524 width=1119) + Select Operator [SEL_220] (rows=201 width=12) Output:["_col0"] - Filter Operator [FIL_218] (rows=36524 width=1119) + Filter Operator [FIL_218] (rows=201 width=12) predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 19 [SIMPLE_EDGE] vectorized + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_229] (rows=57591150 width=77) + Select Operator [SEL_229] (rows=53632139 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_228] (rows=57591150 width=77) + Filter Operator [FIL_228] (rows=53632139 width=19) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 9 [SIMPLE_EDGE] + TableScan [TS_12] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_200] (rows=633595212 width=88) - Conds:RS_255._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_200] (rows=13737330 width=8) + Conds:RS_243._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_219] (rows=18262 width=1119) + Select Operator [SEL_219] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_217] (rows=18262 width=1119) + Filter Operator [FIL_217] (rows=50 width=12) predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_9] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] PartitionCols:_col0 - Select Operator [SEL_254] (rows=575995635 width=88) + Select Operator [SEL_242] (rows=501694138 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_253] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_32_store_returns_sr_customer_sk_min) AND DynamicValue(RS_32_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_32_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_32_store_returns_sr_item_sk_min) AND DynamicValue(RS_32_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_32_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_35_item_i_item_sk_min) AND DynamicValue(RS_35_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_35_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_38_store_s_store_sk_min) AND DynamicValue(RS_38_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_38_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_32_store_returns_sr_ticket_number_min) AND DynamicValue(RS_32_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_32_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_241] (rows=501694138 width=23) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_32_store_returns_sr_customer_sk_min) AND DynamicValue(RS_32_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_32_store_returns_sr_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_44_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_44_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_44_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_32_store_returns_sr_item_sk_min) AND DynamicValue(RS_32_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_32_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_44_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_catalog_sales_cs_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_32_store_returns_sr_ticket_number_min) AND DynamicValue(RS_32_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_32_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_227] Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_225] Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=18262 width=1119) + Select Operator [SEL_222] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_219] - <-Reducer 16 [BROADCAST_EDGE] vectorized + <-Reducer 18 [BROADCAST_EDGE] vectorized BROADCAST [RS_232] Group By Operator [GBY_231] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_121] Group By Operator [GBY_120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_119] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_119] (rows=5384572 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_201] - <-Reducer 17 [BROADCAST_EDGE] vectorized + <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_234] Group By Operator [GBY_233] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_126] Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_124] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_124] (rows=5384572 width=5) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_201] - <-Reducer 18 [BROADCAST_EDGE] vectorized + <-Reducer 20 [BROADCAST_EDGE] vectorized BROADCAST [RS_236] Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3507020)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_131] Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_129] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3507020)"] + Select Operator [SEL_129] (rows=5384572 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_201] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_242] - Group By Operator [GBY_241] (rows=1 width=12) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_240] (rows=462000 width=1436) + Select Operator [SEL_144] (rows=7638375 width=6) Output:["_col0"] - Please refer to the previous Select Operator [SEL_238] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_252] - Group By Operator [GBY_251] (rows=1 width=12) + Please refer to the previous Merge Join Operator [MERGEJOIN_199] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_240] + Group By Operator [GBY_239] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_250] - Group By Operator [GBY_249] (rows=1 width=12) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_248] (rows=1704 width=1910) + Select Operator [SEL_149] (rows=7638375 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_246] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_199] (rows=316788826 width=135) - Conds:RS_216._col0=RS_208._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_208] - PartitionCols:_col0 - Select Operator [SEL_207] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_206] (rows=73049 width=1119) - predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] - PartitionCols:_col0 - Select Operator [SEL_215] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_214] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_209] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_207] + Please refer to the previous Merge Join Operator [MERGEJOIN_199] diff --git a/ql/src/test/results/clientpositive/perf/tez/query3.q.out b/ql/src/test/results/clientpositive/perf/tez/query3.q.out index f96940bce8..d4296cf3b2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query3.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query3.q.out @@ -63,53 +63,53 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_77] - Limit [LIM_76] (rows=100 width=88) + Limit [LIM_76] (rows=100 width=220) Number of rows:100 - Select Operator [SEL_75] (rows=348477374 width=88) + Select Operator [SEL_75] (rows=274400 width=220) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_74] - Group By Operator [GBY_73] (rows=348477374 width=88) + Group By Operator [GBY_73] (rows=274400 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=274400 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col8, _col4, _col5 - Merge Join Operator [MERGEJOIN_53] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_53] (rows=589741 width=108) Conds:RS_12._col0=RS_64._col0(Inner),Output:["_col2","_col4","_col5","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_63] (rows=36524 width=1119) + Select Operator [SEL_63] (rows=5619 width=12) Output:["_col0","_col1"] - Filter Operator [FIL_62] (rows=36524 width=1119) + Filter Operator [FIL_62] (rows=5619 width=12) predicate:((d_moy = 12) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,dt,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,dt,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_52] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_52] (rows=7666836 width=104) Conds:RS_72._col1=RS_56._col0(Inner),Output:["_col0","_col2","_col4","_col5"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_55] (rows=231000 width=1436) + Select Operator [SEL_55] (rows=669 width=111) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=231000 width=1436) + Filter Operator [FIL_54] (rows=669 width=111) predicate:((i_manufact_id = 436) and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id"] + TableScan [TS_3] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_72] PartitionCols:_col1 - Select Operator [SEL_71] (rows=575995635 width=88) + Select Operator [SEL_71] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_70] (rows=575995635 width=88) + Filter Operator [FIL_70] (rows=550076554 width=114) predicate:((ss_item_sk BETWEEN DynamicValue(RS_10_item_i_item_sk_min) AND DynamicValue(RS_10_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_10_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_dt_d_date_sk_min) AND DynamicValue(RS_13_dt_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_dt_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_61] Group By Operator [GBY_60] (rows=1 width=12) @@ -118,7 +118,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_59] Group By Operator [GBY_58] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_57] (rows=231000 width=1436) + Select Operator [SEL_57] (rows=669 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_55] <-Reducer 9 [BROADCAST_EDGE] vectorized @@ -129,7 +129,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_67] Group By Operator [GBY_66] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_65] (rows=36524 width=1119) + Select Operator [SEL_65] (rows=5619 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_63] diff --git a/ql/src/test/results/clientpositive/perf/tez/query30.q.out b/ql/src/test/results/clientpositive/perf/tez/query30.q.out index 1233c23cc4..9f9559e1e4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query30.q.out @@ -88,130 +88,130 @@ Stage-0 Stage-1 Reducer 4 vectorized File Output Operator [FS_210] - Limit [LIM_209] (rows=100 width=860) + Limit [LIM_209] (rows=100 width=942) Number of rows:100 - Select Operator [SEL_208] (rows=96800003 width=860) + Select Operator [SEL_208] (rows=691171 width=942) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_63] - Select Operator [SEL_62] (rows=96800003 width=860) + Select Operator [SEL_62] (rows=691171 width=942) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Merge Join Operator [MERGEJOIN_177] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_177] (rows=691171 width=942) Conds:RS_59._col0=RS_60._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col18"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_171] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_171] (rows=1568628 width=834) Conds:RS_180._col2=RS_187._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_187] PartitionCols:_col0 - Select Operator [SEL_184] (rows=20000000 width=1014) + Select Operator [SEL_184] (rows=784314 width=90) Output:["_col0"] - Filter Operator [FIL_181] (rows=20000000 width=1014) + Filter Operator [FIL_181] (rows=784314 width=90) predicate:((ca_state = 'IL') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_180] PartitionCols:_col2 - Select Operator [SEL_179] (rows=80000000 width=860) + Select Operator [SEL_179] (rows=80000000 width=849) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_178] (rows=80000000 width=860) + Filter Operator [FIL_178] (rows=80000000 width=849) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] + TableScan [TS_0] (rows=80000000 width=849) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 - Select Operator [SEL_55] (rows=8066666 width=1014) + Select Operator [SEL_55] (rows=704993 width=227) Output:["_col0","_col2"] - Filter Operator [FIL_54] (rows=8066666 width=1014) + Filter Operator [FIL_54] (rows=704993 width=227) predicate:(_col2 > _col3) - Merge Join Operator [MERGEJOIN_176] (rows=24200000 width=1014) + Merge Join Operator [MERGEJOIN_176] (rows=2114980 width=227) Conds:RS_202._col1=RS_207._col1(Inner),Output:["_col0","_col2","_col3"] <-Reducer 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_207] PartitionCols:_col1 - Select Operator [SEL_206] (rows=11000000 width=1014) + Select Operator [SEL_206] (rows=6 width=198) Output:["_col0","_col1"] - Group By Operator [GBY_205] (rows=11000000 width=1014) + Group By Operator [GBY_205] (rows=6 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 - Select Operator [SEL_204] (rows=22000000 width=1014) + Select Operator [SEL_204] (rows=2537976 width=201) Output:["_col0","_col2"] - Group By Operator [GBY_203] (rows=22000000 width=1014) + Group By Operator [GBY_203] (rows=2537976 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Group By Operator [GBY_42] (rows=44000000 width=1014) + Group By Operator [GBY_42] (rows=3923529 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Merge Join Operator [MERGEJOIN_175] (rows=44000000 width=1014) + Merge Join Operator [MERGEJOIN_175] (rows=3923529 width=184) Conds:RS_38._col2=RS_189._col0(Inner),Output:["_col1","_col3","_col7"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_186] (rows=40000000 width=1014) + Select Operator [SEL_186] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_183] (rows=40000000 width=1014) + Filter Operator [FIL_183] (rows=40000000 width=90) predicate:(ca_address_sk is not null and ca_state is not null) Please refer to the previous TableScan [TS_3] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_174] (rows=15838314 width=92) + Merge Join Operator [MERGEJOIN_174] (rows=3923529 width=101) Conds:RS_195._col0=RS_199._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_193] (rows=14398467 width=92) + Select Operator [SEL_193] (rows=13130761 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_191] (rows=14398467 width=92) + Filter Operator [FIL_191] (rows=13130761 width=118) predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) - TableScan [TS_6] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] + TableScan [TS_6] (rows=14398467 width=118) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_199] PartitionCols:_col0 - Select Operator [SEL_197] (rows=36524 width=1119) + Select Operator [SEL_197] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_196] (rows=36524 width=1119) + Filter Operator [FIL_196] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_202] PartitionCols:_col1 - Select Operator [SEL_201] (rows=22000000 width=1014) + Select Operator [SEL_201] (rows=2114980 width=201) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_200] (rows=22000000 width=1014) + Group By Operator [GBY_200] (rows=2114980 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=44000000 width=1014) + Group By Operator [GBY_22] (rows=3746772 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Merge Join Operator [MERGEJOIN_173] (rows=44000000 width=1014) + Merge Join Operator [MERGEJOIN_173] (rows=3746772 width=184) Conds:RS_18._col2=RS_188._col0(Inner),Output:["_col1","_col3","_col7"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_188] PartitionCols:_col0 - Select Operator [SEL_185] (rows=40000000 width=1014) + Select Operator [SEL_185] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_182] (rows=40000000 width=1014) + Filter Operator [FIL_182] (rows=40000000 width=90) predicate:(ca_address_sk is not null and ca_state is not null) Please refer to the previous TableScan [TS_3] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_172] (rows=15838314 width=92) + Merge Join Operator [MERGEJOIN_172] (rows=3746772 width=101) Conds:RS_194._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_192] (rows=14398467 width=92) + Select Operator [SEL_192] (rows=12539215 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_190] (rows=14398467 width=92) + Filter Operator [FIL_190] (rows=12539215 width=118) predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) Please refer to the previous TableScan [TS_6] <-Map 14 [SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query31.q.out b/ql/src/test/results/clientpositive/perf/tez/query31.q.out index 0f22f520b5..32d06b6db1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query31.q.out @@ -113,426 +113,372 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 30 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Map 36 <- Reducer 11 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE) -Map 37 <- Reducer 15 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE) -Map 38 <- Reducer 20 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE) -Map 39 <- Reducer 24 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE) -Map 40 <- Reducer 28 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 37 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 13 <- Map 29 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 38 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 17 <- Map 29 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 23 (ONE_TO_ONE_EDGE), Reducer 27 (ONE_TO_ONE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 20 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 39 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 40 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 26 <- Map 29 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 29 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 33 <- Reducer 13 (BROADCAST_EDGE) +Map 34 <- Reducer 17 (BROADCAST_EDGE) +Map 35 <- Reducer 23 (BROADCAST_EDGE) +Map 36 <- Reducer 27 (BROADCAST_EDGE) +Map 37 <- Reducer 31 (BROADCAST_EDGE) +Reducer 10 <- Map 33 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Map 32 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 34 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 15 <- Map 32 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 35 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 19 <- Map 32 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (ONE_TO_ONE_EDGE), Reducer 26 (ONE_TO_ONE_EDGE) +Reducer 22 <- Reducer 21 (ONE_TO_ONE_EDGE), Reducer 30 (ONE_TO_ONE_EDGE) +Reducer 23 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 36 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 25 <- Map 32 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 37 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 29 <- Map 32 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 32 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 8 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 36 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 9 <- Map 29 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 16 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 - File Output Operator [FS_133] - Select Operator [SEL_132] (rows=287493839 width=88) + Reducer 7 + File Output Operator [FS_139] + Select Operator [SEL_138] (rows=110 width=550) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_130] (rows=287493839 width=88) + Filter Operator [FIL_136] (rows=110 width=770) predicate:(CASE WHEN ((_col1 > 0)) THEN (CASE WHEN ((_col9 > 0)) THEN (((_col11 / _col9) > (_col5 / _col1))) ELSE ((null > (_col5 / _col1))) END) ELSE (CASE WHEN ((_col9 > 0)) THEN (((_col11 / _col9) > null)) ELSE (null) END) END and CASE WHEN ((_col3 > 0)) THEN (CASE WHEN ((_col7 > 0)) THEN (((_col9 / _col7) > (_col1 / _col3))) ELSE ((null > (_col1 / _col3))) END) ELSE (CASE WHEN ((_col7 > 0)) THEN (((_col9 / _col7) > null)) ELSE (null) END) END) - Merge Join Operator [MERGEJOIN_448] (rows=1149975359 width=88) - Conds:RS_519._col0=RS_528._col0(Inner),RS_519._col0=RS_537._col0(Inner),RS_519._col0=RS_128._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col7","_col9","_col11"] - <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_528] + Merge Join Operator [MERGEJOIN_450] (rows=440 width=770) + Conds:RS_133._col0=RS_134._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col7","_col9","_col11"] + <-Reducer 22 [ONE_TO_ONE_EDGE] + FORWARD [RS_134] PartitionCols:_col0 - Group By Operator [GBY_527] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_37] + Merge Join Operator [MERGEJOIN_449] (rows=440 width=434) + Conds:RS_123._col0=RS_536._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Reducer 21 [ONE_TO_ONE_EDGE] + FORWARD [RS_123] PartitionCols:_col0 - Group By Operator [GBY_36] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_438] (rows=696954748 width=88) - Conds:RS_32._col1=RS_491._col0(Inner),Output:["_col2","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_491] - PartitionCols:_col0 - Select Operator [SEL_488] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_487] (rows=40000000 width=1014) - predicate:(ca_address_sk is not null and ca_county is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_437] (rows=633595212 width=88) - Conds:RS_526._col0=RS_463._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_463] - PartitionCols:_col0 - Select Operator [SEL_456] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_450] (rows=18262 width=1119) - predicate:((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_526] - PartitionCols:_col0 - Select Operator [SEL_525] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_524] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_33_customer_address_ca_address_sk_min) AND DynamicValue(RS_33_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_33_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_20] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_521] - Group By Operator [GBY_520] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_480] - Group By Operator [GBY_474] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_464] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_456] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_523] - Group By Operator [GBY_522] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_508] - Group By Operator [GBY_502] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_492] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_488] - <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_537] - PartitionCols:_col0 - Group By Operator [GBY_536] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col0 - Group By Operator [GBY_56] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_440] (rows=696954748 width=88) - Conds:RS_52._col1=RS_493._col0(Inner),Output:["_col2","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_493] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_488] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_439] (rows=633595212 width=88) - Conds:RS_535._col0=RS_465._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_465] - PartitionCols:_col0 - Select Operator [SEL_457] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_451] (rows=18262 width=1119) - predicate:((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_535] - PartitionCols:_col0 - Select Operator [SEL_534] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_533] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_40] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_530] - Group By Operator [GBY_529] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_481] - Group By Operator [GBY_475] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_466] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_457] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_532] - Group By Operator [GBY_531] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_509] - Group By Operator [GBY_503] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_494] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_488] - <-Reducer 19 [ONE_TO_ONE_EDGE] - FORWARD [RS_128] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_447] (rows=191667561 width=135) - Conds:RS_546._col0=RS_555._col0(Inner),RS_546._col0=RS_564._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_546] + Merge Join Operator [MERGEJOIN_448] (rows=440 width=322) + Conds:RS_522._col0=RS_529._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 20 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_522] + PartitionCols:_col0 + Group By Operator [GBY_521] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Group By Operator [GBY_76] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_441] (rows=10246882 width=209) + Conds:RS_72._col1=RS_497._col0(Inner),Output:["_col2","_col7"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_497] + PartitionCols:_col0 + Select Operator [SEL_493] (rows=40000000 width=102) + Output:["_col0","_col1"] + Filter Operator [FIL_492] (rows=40000000 width=102) + predicate:(ca_address_sk is not null and ca_county is not null) + TableScan [TS_6] (rows=40000000 width=102) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_440] (rows=10246882 width=115) + Conds:RS_520._col0=RS_469._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_469] + PartitionCols:_col0 + Select Operator [SEL_460] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_454] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_520] + PartitionCols:_col0 + Select Operator [SEL_519] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_518] (rows=143931246 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_60] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_517] + Group By Operator [GBY_516] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_484] + Group By Operator [GBY_478] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_470] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_460] + <-Reducer 26 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_529] + PartitionCols:_col0 + Group By Operator [GBY_528] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col0 + Group By Operator [GBY_96] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_443] (rows=10246882 width=209) + Conds:RS_92._col1=RS_498._col0(Inner),Output:["_col2","_col7"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_498] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_493] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_92] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_442] (rows=10246882 width=115) + Conds:RS_527._col0=RS_471._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_471] + PartitionCols:_col0 + Select Operator [SEL_461] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_455] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_527] + PartitionCols:_col0 + Select Operator [SEL_526] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_525] (rows=143931246 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_80] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_524] + Group By Operator [GBY_523] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_485] + Group By Operator [GBY_479] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_472] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_461] + <-Reducer 30 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_536] PartitionCols:_col0 - Group By Operator [GBY_545] (rows=87121617 width=135) + Group By Operator [GBY_535] (rows=440 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_77] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=174243235 width=135) + Group By Operator [GBY_116] (rows=3960 width=210) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_442] (rows=174243235 width=135) - Conds:RS_72._col1=RS_495._col0(Inner),Output:["_col2","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_495] + Merge Join Operator [MERGEJOIN_445] (rows=10246882 width=209) + Conds:RS_112._col1=RS_499._col0(Inner),Output:["_col2","_col7"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_499] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_488] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_72] + Please refer to the previous Select Operator [SEL_493] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_441] (rows=158402938 width=135) - Conds:RS_544._col0=RS_467._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_467] + Merge Join Operator [MERGEJOIN_444] (rows=10246882 width=115) + Conds:RS_534._col0=RS_473._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_473] PartitionCols:_col0 - Select Operator [SEL_458] (rows=18262 width=1119) + Select Operator [SEL_462] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_452] (rows=18262 width=1119) - predicate:((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) + Filter Operator [FIL_456] (rows=130 width=12) + predicate:((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_544] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_534] PartitionCols:_col0 - Select Operator [SEL_543] (rows=144002668 width=135) + Select Operator [SEL_533] (rows=143931246 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_542] (rows=144002668 width=135) - predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_73_customer_address_ca_address_sk_min) AND DynamicValue(RS_73_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_73_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_60] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_539] - Group By Operator [GBY_538] (rows=1 width=12) + Filter Operator [FIL_532] (rows=143931246 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_110_date_dim_d_date_sk_min) AND DynamicValue(RS_110_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_110_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_100] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_531] + Group By Operator [GBY_530] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_482] - Group By Operator [GBY_476] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_486] + Group By Operator [GBY_480] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_468] (rows=18262 width=1119) + Select Operator [SEL_474] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_458] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_541] - Group By Operator [GBY_540] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] - Group By Operator [GBY_504] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_496] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_488] - <-Reducer 23 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_555] + Please refer to the previous Select Operator [SEL_462] + <-Reducer 6 [ONE_TO_ONE_EDGE] + FORWARD [RS_133] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_447] (rows=1605 width=434) + Conds:RS_130._col0=RS_515._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_515] PartitionCols:_col0 - Group By Operator [GBY_554] (rows=87121617 width=135) + Group By Operator [GBY_514] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_97] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_57] PartitionCols:_col0 - Group By Operator [GBY_96] (rows=174243235 width=135) + Group By Operator [GBY_56] (rows=33705 width=210) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_444] (rows=174243235 width=135) - Conds:RS_92._col1=RS_497._col0(Inner),Output:["_col2","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_497] + Merge Join Operator [MERGEJOIN_439] (rows=37399561 width=139) + Conds:RS_52._col1=RS_496._col0(Inner),Output:["_col2","_col7"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_496] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_488] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_92] + Please refer to the previous Select Operator [SEL_493] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_443] (rows=158402938 width=135) - Conds:RS_553._col0=RS_469._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_469] + Merge Join Operator [MERGEJOIN_438] (rows=37399561 width=42) + Conds:RS_513._col0=RS_467._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_467] PartitionCols:_col0 - Select Operator [SEL_459] (rows=18262 width=1119) + Select Operator [SEL_459] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_453] (rows=18262 width=1119) - predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) + Filter Operator [FIL_453] (rows=130 width=12) + predicate:((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_553] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_513] PartitionCols:_col0 - Select Operator [SEL_552] (rows=144002668 width=135) + Select Operator [SEL_512] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_551] (rows=144002668 width=135) - predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_93_customer_address_ca_address_sk_min) AND DynamicValue(RS_93_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_93_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_80] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_548] - Group By Operator [GBY_547] (rows=1 width=12) + Filter Operator [FIL_511] (rows=525327191 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_40] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_510] + Group By Operator [GBY_509] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_483] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_483] Group By Operator [GBY_477] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_470] (rows=18262 width=1119) + Select Operator [SEL_468] (rows=130 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_459] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_550] - Group By Operator [GBY_549] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_511] - Group By Operator [GBY_505] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_498] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_488] - <-Reducer 27 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_564] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_130] PartitionCols:_col0 - Group By Operator [GBY_563] (rows=87121617 width=135) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_446] (rows=1605 width=322) + Conds:RS_501._col0=RS_508._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 12 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_508] PartitionCols:_col0 - Group By Operator [GBY_116] (rows=174243235 width=135) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_446] (rows=174243235 width=135) - Conds:RS_112._col1=RS_499._col0(Inner),Output:["_col2","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_499] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_488] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_112] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_445] (rows=158402938 width=135) - Conds:RS_562._col0=RS_471._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_471] + Group By Operator [GBY_507] (rows=1605 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Group By Operator [GBY_36] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_437] (rows=37399561 width=139) + Conds:RS_32._col1=RS_495._col0(Inner),Output:["_col2","_col7"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_495] PartitionCols:_col0 - Select Operator [SEL_460] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_454] (rows=18262 width=1119) - predicate:((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_562] + Please refer to the previous Select Operator [SEL_493] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_436] (rows=37399561 width=42) + Conds:RS_506._col0=RS_465._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_465] + PartitionCols:_col0 + Select Operator [SEL_458] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_452] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_506] + PartitionCols:_col0 + Select Operator [SEL_505] (rows=525327191 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_504] (rows=525327191 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_20] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_503] + Group By Operator [GBY_502] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_482] + Group By Operator [GBY_476] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_466] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_501] + PartitionCols:_col0 + Group By Operator [GBY_500] (rows=1605 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_435] (rows=37399561 width=139) + Conds:RS_12._col1=RS_494._col0(Inner),Output:["_col2","_col7"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_494] PartitionCols:_col0 - Select Operator [SEL_561] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_560] (rows=144002668 width=135) - predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_113_customer_address_ca_address_sk_min) AND DynamicValue(RS_113_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_113_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_110_date_dim_d_date_sk_min) AND DynamicValue(RS_110_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_110_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_100] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_557] - Group By Operator [GBY_556] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_484] - Group By Operator [GBY_478] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_472] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_460] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_559] - Group By Operator [GBY_558] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_512] - Group By Operator [GBY_506] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_500] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_488] - <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_519] - PartitionCols:_col0 - Group By Operator [GBY_518] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_436] (rows=696954748 width=88) - Conds:RS_12._col1=RS_489._col0(Inner),Output:["_col2","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_489] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_488] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_435] (rows=633595212 width=88) - Conds:RS_517._col0=RS_461._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_461] - PartitionCols:_col0 - Select Operator [SEL_455] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_449] (rows=18262 width=1119) - predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_517] - PartitionCols:_col0 - Select Operator [SEL_516] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_515] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_13_customer_address_ca_address_sk_min) AND DynamicValue(RS_13_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_13_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_514] - Group By Operator [GBY_513] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] - Group By Operator [GBY_501] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_490] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_488] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_486] - Group By Operator [GBY_485] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_479] - Group By Operator [GBY_473] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_462] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_455] + Please refer to the previous Select Operator [SEL_493] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_434] (rows=37399561 width=42) + Conds:RS_491._col0=RS_463._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_463] + PartitionCols:_col0 + Select Operator [SEL_457] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_451] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_491] + PartitionCols:_col0 + Select Operator [SEL_490] (rows=525327191 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_489] (rows=525327191 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_488] + Group By Operator [GBY_487] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_481] + Group By Operator [GBY_475] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_464] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_457] diff --git a/ql/src/test/results/clientpositive/perf/tez/query32.q.out b/ql/src/test/results/clientpositive/perf/tez/query32.q.out index 7716da9cf3..1c1a2e7b5e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query32.q.out @@ -63,105 +63,148 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Map 12 <- Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 11 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 2 (SIMPLE_EDGE) -Reducer 6 <- Map 9 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_129] - Limit [LIM_128] (rows=1 width=112) + File Output Operator [FS_141] + Limit [LIM_140] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_127] (rows=1 width=112) + Group By Operator [GBY_139] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_36] Group By Operator [GBY_35] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col2)"] - Select Operator [SEL_34] (rows=116155905 width=135) + Select Operator [SEL_34] (rows=2478 width=112) Output:["_col2"] - Filter Operator [FIL_33] (rows=116155905 width=135) + Filter Operator [FIL_33] (rows=2478 width=112) predicate:(_col2 > CAST( (1.3 * _col6) AS decimal(14,7))) - Merge Join Operator [MERGEJOIN_104] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_104] (rows=7434 width=112) Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col6"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_30] + PARTITION_ONLY_SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_101] (rows=316788826 width=135) - Conds:RS_123._col0=RS_107._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_101] (rows=31836679 width=110) + Conds:RS_128._col0=RS_107._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_106] (rows=8116 width=1119) + Select Operator [SEL_106] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_105] (rows=8116 width=1119) + Filter Operator [FIL_105] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + SHUFFLE [RS_128] PartitionCols:_col0 - Select Operator [SEL_122] (rows=287989836 width=135) + Select Operator [SEL_127] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_121] (rows=287989836 width=135) + Filter Operator [FIL_126] (rows=286549727 width=119) predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) + TableScan [TS_0] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - Group By Operator [GBY_117] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_116] (rows=231000 width=1436) + Select Operator [SEL_120] (rows=669 width=4) Output:["_col0"] - Select Operator [SEL_114] (rows=231000 width=1436) + Select Operator [SEL_118] (rows=669 width=8) Output:["_col0"] - Filter Operator [FIL_113] (rows=231000 width=1436) + Filter Operator [FIL_117] (rows=669 width=7) predicate:((i_manufact_id = 269) and i_item_sk is not null) - TableScan [TS_20] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + TableScan [TS_20] (rows=462000 width=7) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=8116 width=1119) + Select Operator [SEL_108] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_106] - <-Reducer 6 [ONE_TO_ONE_EDGE] + <-Reducer 10 [ONE_TO_ONE_EDGE] FORWARD [RS_31] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_103] (rows=174233858 width=135) - Conds:RS_126._col0=RS_115._col0(Inner),Output:["_col1","_col2"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + Merge Join Operator [MERGEJOIN_103] (rows=97 width=116) + Conds:RS_138._col0=RS_119._col0(Inner),Output:["_col1","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_119] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_114] - <-Reducer 5 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_126] + Please refer to the previous Select Operator [SEL_118] + <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_138] PartitionCols:_col0 - Select Operator [SEL_125] (rows=158394413 width=135) + Select Operator [SEL_137] (rows=6951 width=116) Output:["_col0","_col1"] - Group By Operator [GBY_124] (rows=158394413 width=135) + Group By Operator [GBY_136] (rows=6951 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=316788826 width=135) + Group By Operator [GBY_16] (rows=97314 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_101] + Merge Join Operator [MERGEJOIN_102] (rows=31836679 width=110) + Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_106] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + PartitionCols:_col0 + Select Operator [SEL_134] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_133] (rows=286549727 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_30_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_30_catalog_sales_cs_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_30_catalog_sales_cs_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_125] + Please refer to the previous Group By Operator [GBY_123] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_110] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 5 [BROADCAST_EDGE] vectorized + BROADCAST [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_89] (rows=31836679 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_101] diff --git a/ql/src/test/results/clientpositive/perf/tez/query33.q.out b/ql/src/test/results/clientpositive/perf/tez/query33.q.out index 068d198e9e..a76122c4ed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query33.q.out @@ -163,324 +163,288 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 17 <- Reducer 21 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 32 <- Reducer 11 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE) -Map 33 <- Reducer 14 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 11 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Reducer 2 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 14 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 19 <- Map 28 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 16 (ONE_TO_ONE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) -Reducer 23 <- Map 28 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 20 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) -Reducer 26 <- Map 28 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 28 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Map 14 <- Reducer 18 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) +Map 29 <- Reducer 21 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE) +Map 30 <- Reducer 24 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) +Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 13 <- Map 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE) +Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_370] - Limit [LIM_369] (rows=100 width=108) + File Output Operator [FS_368] + Limit [LIM_367] (rows=59 width=115) Number of rows:100 - Select Operator [SEL_368] (rows=335408073 width=108) + Select Operator [SEL_366] (rows=59 width=115) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_367] - Group By Operator [GBY_366] (rows=335408073 width=108) + SHUFFLE [RS_365] + Group By Operator [GBY_364] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] - <-Reducer 10 [CONTAINS] vectorized - Reduce Output Operator [RS_382] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_388] PartitionCols:_col0 - Group By Operator [GBY_381] (rows=670816147 width=108) + Group By Operator [GBY_387] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_380] (rows=191657247 width=135) + Group By Operator [GBY_386] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_71] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=383314495 width=135) + Group By Operator [GBY_108] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=383314495 width=135) - Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] + Merge Join Operator [MERGEJOIN_304] (rows=11364 width=3) + Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_66] + SHUFFLE [RS_104] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_293] (rows=508200 width=1436) + Merge Join Operator [MERGEJOIN_293] (rows=461514 width=7) Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_319] PartitionCols:_col1 - Select Operator [SEL_318] (rows=462000 width=1436) + Select Operator [SEL_318] (rows=460848 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_317] (rows=462000 width=1436) + Filter Operator [FIL_317] (rows=460848 width=7) predicate:(i_item_sk is not null and i_manufact_id is not null) - TableScan [TS_0] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] - <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized + TableScan [TS_0] (rows=462000 width=7) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_325] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=115500 width=1436) + Group By Operator [GBY_324] (rows=692 width=3) Output:["_col0"],keys:KEY._col0 - <-Map 15 [SIMPLE_EDGE] vectorized + <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_323] PartitionCols:_col0 - Group By Operator [GBY_322] (rows=231000 width=1436) + Group By Operator [GBY_322] (rows=692 width=3) Output:["_col0"],keys:i_manufact_id - Select Operator [SEL_321] (rows=231000 width=1436) + Select Operator [SEL_321] (rows=46085 width=93) Output:["i_manufact_id"] - Filter Operator [FIL_320] (rows=231000 width=1436) + Filter Operator [FIL_320] (rows=46085 width=93) predicate:((i_category = 'Books') and i_manufact_id is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"] + TableScan [TS_3] (rows=462000 width=93) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_manufact_id"] <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col4 - Select Operator [SEL_62] (rows=348467716 width=135) - Output:["_col4","_col5"] - Merge Join Operator [MERGEJOIN_298] (rows=348467716 width=135) - Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"] - <-Map 28 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_346] - PartitionCols:_col0 - Select Operator [SEL_343] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_342] (rows=20000000 width=1014) - predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_297] (rows=316788826 width=135) - Conds:RS_379._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_330] - PartitionCols:_col0 - Select Operator [SEL_327] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_326] (rows=18262 width=1119) - predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_379] - PartitionCols:_col0 - Select Operator [SEL_378] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_377] (rows=287989836 width=135) - predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_60_customer_address_ca_address_sk_min) AND DynamicValue(RS_60_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_60_customer_address_ca_address_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_66_item_i_item_sk_min) AND DynamicValue(RS_66_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_66_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_47] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_376] - Group By Operator [GBY_375] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_240] - Group By Operator [GBY_239] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_238] (rows=508200 width=1436) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_293] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_372] - Group By Operator [GBY_371] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_335] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_331] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_327] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_374] - Group By Operator [GBY_373] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_354] - Group By Operator [GBY_351] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_347] (rows=20000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_343] - <-Reducer 13 [CONTAINS] vectorized - Reduce Output Operator [RS_394] - PartitionCols:_col0 - Group By Operator [GBY_393] (rows=670816147 width=108) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_392] (rows=95833781 width=135) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_109] - PartitionCols:_col0 - Group By Operator [GBY_108] (rows=191667562 width=135) - Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=191667562 width=135) - Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_293] - <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col3 - Select Operator [SEL_100] (rows=174243235 width=135) + Select Operator [SEL_100] (rows=788222 width=110) Output:["_col3","_col5"] - Merge Join Operator [MERGEJOIN_301] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_301] (rows=788222 width=110) Conds:RS_97._col2=RS_348._col0(Inner),Output:["_col1","_col3"] - <-Map 28 [SIMPLE_EDGE] vectorized + <-Map 25 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_348] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_343] - <-Reducer 25 [SIMPLE_EDGE] + Select Operator [SEL_343] (rows=8000000 width=116) + Output:["_col0"] + Filter Operator [FIL_342] (rows=8000000 width=112) + predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) + TableScan [TS_16] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_97] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_300] (rows=158402938 width=135) - Conds:RS_391._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 20 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_300] (rows=3941109 width=118) + Conds:RS_385._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_332] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_327] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_391] + Select Operator [SEL_327] (rows=50 width=12) + Output:["_col0"] + Filter Operator [FIL_326] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_385] PartitionCols:_col0 - Select Operator [SEL_390] (rows=144002668 width=135) + Select Operator [SEL_384] (rows=143931246 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_389] (rows=144002668 width=135) - predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_98_customer_address_ca_address_sk_min) AND DynamicValue(RS_98_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_98_customer_address_ca_address_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_104_item_i_item_sk_min) AND DynamicValue(RS_104_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_104_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_85] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_388] - Group By Operator [GBY_387] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_280] - Group By Operator [GBY_279] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_278] (rows=508200 width=1436) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_293] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_384] - Group By Operator [GBY_383] (rows=1 width=12) + Filter Operator [FIL_383] (rows=143931246 width=123) + predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_98_customer_address_ca_address_sk_min) AND DynamicValue(RS_98_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_98_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_85] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_380] + Group By Operator [GBY_379] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_339] Group By Operator [GBY_336] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_333] (rows=18262 width=1119) + Select Operator [SEL_333] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_327] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_386] - Group By Operator [GBY_385] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_382] + Group By Operator [GBY_381] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_355] Group By Operator [GBY_352] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_349] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_349] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_343] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_365] + Reduce Output Operator [RS_363] PartitionCols:_col0 - Group By Operator [GBY_364] (rows=670816147 width=108) + Group By Operator [GBY_362] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_363] (rows=383325119 width=88) + Group By Operator [GBY_361] (rows=64 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=766650239 width=88) + Group By Operator [GBY_33] (rows=64 width=115) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_302] (rows=41476 width=3) Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_293] - <-Reducer 19 [SIMPLE_EDGE] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Select Operator [SEL_25] (rows=696954748 width=88) + Select Operator [SEL_25] (rows=2876890 width=4) Output:["_col3","_col5"] - Merge Join Operator [MERGEJOIN_295] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_295] (rows=2876890 width=4) Conds:RS_22._col2=RS_344._col0(Inner),Output:["_col1","_col3"] - <-Map 28 [SIMPLE_EDGE] vectorized + <-Map 25 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_344] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_343] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_294] (rows=633595212 width=88) - Conds:RS_362._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 20 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_294] (rows=14384447 width=4) + Conds:RS_360._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_328] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_327] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_362] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_360] PartitionCols:_col0 - Select Operator [SEL_361] (rows=575995635 width=88) + Select Operator [SEL_359] (rows=525327191 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_360] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_23_customer_address_ca_address_sk_min) AND DynamicValue(RS_23_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_23_customer_address_ca_address_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_item_i_item_sk_min) AND DynamicValue(RS_29_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_358] (rows=525327191 width=118) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_23_customer_address_ca_address_sk_min) AND DynamicValue(RS_23_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_23_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_10] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized BROADCAST [RS_341] Group By Operator [GBY_340] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_337] Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_329] (rows=18262 width=1119) + Select Operator [SEL_329] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_327] - <-Reducer 29 [BROADCAST_EDGE] vectorized + <-Reducer 26 [BROADCAST_EDGE] vectorized BROADCAST [RS_357] Group By Operator [GBY_356] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_353] Group By Operator [GBY_350] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_345] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_345] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_343] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_359] - Group By Operator [GBY_358] (rows=1 width=12) + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_378] + PartitionCols:_col0 + Group By Operator [GBY_377] (rows=59 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Group By Operator [GBY_376] (rows=35 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=35 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 + Merge Join Operator [MERGEJOIN_303] (rows=22352 width=3) + Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_293] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col4 + Select Operator [SEL_62] (rows=1550375 width=13) + Output:["_col4","_col5"] + Merge Join Operator [MERGEJOIN_298] (rows=1550375 width=13) + Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_346] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_343] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_297] (rows=7751872 width=98) + Conds:RS_375._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_330] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_327] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_375] + PartitionCols:_col0 + Select Operator [SEL_374] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_373] (rows=285117733 width=123) + predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_60_customer_address_ca_address_sk_min) AND DynamicValue(RS_60_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_60_customer_address_ca_address_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_47] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_370] + Group By Operator [GBY_369] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_200] - Group By Operator [GBY_199] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_338] + Group By Operator [GBY_335] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=508200 width=1436) + Select Operator [SEL_331] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_327] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_372] + Group By Operator [GBY_371] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_354] + Group By Operator [GBY_351] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_347] (rows=8000000 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_293] + Please refer to the previous Select Operator [SEL_343] diff --git a/ql/src/test/results/clientpositive/perf/tez/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/query34.q.out index dbd09a408f..fa40be9bb9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query34.q.out @@ -73,145 +73,133 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 5 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 10 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_141] - Select Operator [SEL_140] (rows=88000001 width=860) + File Output Operator [FS_136] + Select Operator [SEL_135] (rows=276068 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=88000001 width=860) + Select Operator [SEL_34] (rows=276068 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=88000001 width=860) - Conds:RS_103._col0=RS_139._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + Merge Join Operator [MERGEJOIN_100] (rows=276068 width=364) + Conds:RS_103._col0=RS_134._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=80000000 width=860) + Select Operator [SEL_102] (rows=80000000 width=356) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_101] (rows=80000000 width=860) + Filter Operator [FIL_101] (rows=80000000 width=356) predicate:c_customer_sk is not null - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + TableScan [TS_0] (rows=80000000 width=356) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_134] PartitionCols:_col1 - Filter Operator [FIL_138] (rows=19166256 width=88) + Filter Operator [FIL_133] (rows=276068 width=12) predicate:_col2 BETWEEN 15 AND 20 - Select Operator [SEL_137] (rows=383325119 width=88) + Select Operator [SEL_132] (rows=5521356 width=12) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_136] (rows=383325119 width=88) + Group By Operator [GBY_131] (rows=5521356 width=12) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_25] (rows=766650239 width=88) + Group By Operator [GBY_25] (rows=5521356 width=12) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_99] (rows=766650239 width=88) - Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col1","_col4"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + Merge Join Operator [MERGEJOIN_99] (rows=5521356 width=4) + Conds:RS_21._col3=RS_122._col0(Inner),Output:["_col1","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_124] (rows=1704 width=1910) + Select Operator [SEL_121] (rows=112 width=102) Output:["_col0"] - Filter Operator [FIL_123] (rows=1704 width=1910) + Filter Operator [FIL_120] (rows=112 width=102) predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county"] - <-Reducer 7 [SIMPLE_EDGE] + TableScan [TS_12] (rows=1704 width=102) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_98] (rows=696954748 width=88) - Conds:RS_18._col2=RS_117._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_98] (rows=10407948 width=4) + Conds:RS_18._col2=RS_114._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] PartitionCols:_col0 - Select Operator [SEL_116] (rows=1200 width=107) + Select Operator [SEL_113] (rows=480 width=104) Output:["_col0"] - Filter Operator [FIL_115] (rows=1200 width=107) + Filter Operator [FIL_112] (rows=480 width=104) predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] - <-Reducer 6 [SIMPLE_EDGE] + TableScan [TS_9] (rows=7200 width=104) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_97] (rows=633595212 width=88) - Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + Merge Join Operator [MERGEJOIN_97] (rows=156119211 width=14) + Conds:RS_130._col0=RS_106._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_108] (rows=73049 width=1119) + Select Operator [SEL_105] (rows=595 width=12) Output:["_col0"] - Filter Operator [FIL_107] (rows=73049 width=1119) + Filter Operator [FIL_104] (rows=595 width=12) predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_134] (rows=575995635 width=88) + Select Operator [SEL_129] (rows=479121995 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_133] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_31_customer_c_customer_sk_min) AND DynamicValue(RS_31_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_31_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) + Filter Operator [FIL_128] (rows=479121995 width=19) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=73049 width=1119) + Select Operator [SEL_107] (rows=595 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_108] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_105] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_119] + Group By Operator [GBY_118] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] + Group By Operator [GBY_116] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_118] (rows=1200 width=107) + Select Operator [SEL_115] (rows=480 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_116] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_113] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_126] (rows=1704 width=1910) + Select Operator [SEL_123] (rows=112 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_124] - <-Reducer 4 [BROADCAST_EDGE] vectorized - BROADCAST [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_104] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Please refer to the previous Select Operator [SEL_121] diff --git a/ql/src/test/results/clientpositive/perf/tez/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/query35.q.out index cfb7d4e06c..44993831f4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -129,213 +129,237 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 13 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Map 20 <- Reducer 16 (BROADCAST_EDGE) -Map 21 <- Reducer 19 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 12 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 23 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) +Map 24 <- Reducer 22 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 18 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 15 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 21 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_226] - Limit [LIM_225] (rows=100 width=88) + Reducer 8 vectorized + File Output Operator [FS_236] + Limit [LIM_235] (rows=1 width=352) Number of rows:100 - Select Operator [SEL_224] (rows=1045432122 width=88) + Select Operator [SEL_234] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Select Operator [SEL_222] (rows=1045432122 width=88) + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] + Select Operator [SEL_232] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_221] (rows=1045432122 width=88) + Group By Operator [GBY_231] (rows=1 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_63] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_62] (rows=2090864244 width=88) + Group By Operator [GBY_66] (rows=1 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_104] (rows=2090864244 width=88) + Top N Key Operator [TNK_104] (rows=67 width=276) keys:_col4, _col6, _col7, _col8, _col9, _col10,sort order:++++++,top n:100 - Select Operator [SEL_61] (rows=2090864244 width=88) + Select Operator [SEL_65] (rows=67 width=276) Output:["_col4","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_60] (rows=2090864244 width=88) + Filter Operator [FIL_64] (rows=67 width=276) predicate:(_col12 is not null or _col14 is not null) - Merge Join Operator [MERGEJOIN_174] (rows=2090864244 width=88) - Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_212._col0(Left Outer),RS_55._col0=RS_220._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col12","_col14"] - <-Reducer 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] + Merge Join Operator [MERGEJOIN_182] (rows=67 width=276) + Conds:RS_61._col0=RS_230._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col12","_col14"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_61] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_170] (rows=96800003 width=860) - Conds:RS_50._col1=RS_183._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_183] + Merge Join Operator [MERGEJOIN_181] (rows=68 width=276) + Conds:RS_58._col0=RS_220._col0(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_58] PartitionCols:_col0 - Select Operator [SEL_182] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_181] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_169] (rows=88000001 width=860) - Conds:RS_177._col2=RS_180._col0(Inner),Output:["_col0","_col1","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_177] - PartitionCols:_col2 - Select Operator [SEL_176] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_175] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] + Merge Join Operator [MERGEJOIN_180] (rows=162346 width=272) + Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_179] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_178] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0 - Group By Operator [GBY_54] (rows=633595212 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_171] (rows=633595212 width=88) - Conds:RS_204._col0=RS_186._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_186] - PartitionCols:_col0 - Select Operator [SEL_185] (rows=12174 width=1119) - Output:["_col0"] - Filter Operator [FIL_184] (rows=12174 width=1119) - predicate:((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] + Group By Operator [GBY_54] (rows=168231 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=62428523 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_177] (rows=62428523 width=2) + Conds:RS_210._col0=RS_194._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=217 width=12) + Output:["_col0"] + Filter Operator [FIL_192] (rows=217 width=12) + predicate:((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_210] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_208] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_207] + Group By Operator [GBY_206] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_195] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col0 - Select Operator [SEL_203] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_202] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_199] - Group By Operator [GBY_198] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] - Group By Operator [GBY_192] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_187] (rows=12174 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_185] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_201] - Group By Operator [GBY_200] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_137] - Group By Operator [GBY_136] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"] - Select Operator [SEL_135] (rows=96800003 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_170] - <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=79201469 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_210] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Group By Operator [GBY_29] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_172] (rows=158402938 width=135) - Conds:RS_209._col0=RS_188._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_176] (rows=78293105 width=272) + Conds:RS_50._col1=RS_191._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] + SHUFFLE [RS_191] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_185] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_196] - Group By Operator [GBY_193] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_189] (rows=12174 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_185] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_220] + Select Operator [SEL_190] (rows=1861800 width=186) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_189] (rows=1861800 width=186) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=186) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_175] (rows=77201384 width=93) + Conds:RS_185._col2=RS_188._col0(Inner),Output:["_col0","_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_185] + PartitionCols:_col2 + Select Operator [SEL_184] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_183] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_188] + PartitionCols:_col0 + Select Operator [SEL_187] (rows=40000000 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_186] (rows=40000000 width=90) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_220] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=168231 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_218] (rows=168231 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=168231 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_178] (rows=17104380 width=3) + Conds:RS_217._col0=RS_196._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_196] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_193] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_215] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_58_c_c_customer_sk_min) AND DynamicValue(RS_58_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_58_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_152] + Group By Operator [GBY_151] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_150] (rows=162346 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_180] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_204] + Group By Operator [GBY_201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_197] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Reducer 21 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_230] PartitionCols:_col0 - Select Operator [SEL_219] (rows=158394413 width=135) + Select Operator [SEL_229] (rows=167041 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_218] (rows=158394413 width=135) + Group By Operator [GBY_228] (rows=167041 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col0 - Group By Operator [GBY_43] (rows=316788826 width=135) + Group By Operator [GBY_43] (rows=167041 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_173] (rows=316788826 width=135) - Conds:RS_217._col0=RS_190._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_190] + Merge Join Operator [MERGEJOIN_179] (rows=33642830 width=3) + Conds:RS_227._col0=RS_198._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_198] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_185] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + Please refer to the previous Select Operator [SEL_193] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] PartitionCols:_col0 - Select Operator [SEL_216] (rows=287989836 width=135) + Select Operator [SEL_226] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_215] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_33] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_214] - Group By Operator [GBY_213] (rows=1 width=12) + Filter Operator [FIL_225] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_61_c_c_customer_sk_min) AND DynamicValue(RS_61_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_61_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_33] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_205] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_199] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_224] + Group By Operator [GBY_223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_197] - Group By Operator [GBY_194] (rows=1 width=12) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_167] + Group By Operator [GBY_166] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_191] (rows=12174 width=1119) + Select Operator [SEL_165] (rows=68 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_185] + Please refer to the previous Merge Join Operator [MERGEJOIN_181] diff --git a/ql/src/test/results/clientpositive/perf/tez/query36.q.out b/ql/src/test/results/clientpositive/perf/tez/query36.q.out index 8185303105..4f4ce8a518 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query36.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query36.q.out @@ -69,9 +69,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -85,81 +84,81 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_119] - Limit [LIM_118] (rows=100 width=88) + File Output Operator [FS_114] + Limit [LIM_113] (rows=100 width=490) Number of rows:100 - Select Operator [SEL_117] (rows=1149975358 width=88) + Select Operator [SEL_112] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] - Select Operator [SEL_115] (rows=1149975358 width=88) + SHUFFLE [RS_111] + Select Operator [SEL_110] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_114] (rows=1149975358 width=88) + PTF Operator [PTF_109] (rows=3060 width=414) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col2 / _col3) ASC NULLS FIRST","partition by:":"(grouping(_col4, 1) + grouping(_col4, 0)), CASE WHEN ((grouping(_col4, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_113] (rows=1149975358 width=88) + Select Operator [SEL_108] (rows=3060 width=414) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_107] PartitionCols:(grouping(_col4, 1) + grouping(_col4, 0)), CASE WHEN ((grouping(_col4, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_111] (rows=1149975358 width=88) + Select Operator [SEL_106] (rows=3060 width=414) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_110] (rows=1149975358 width=88) + Group By Operator [GBY_105] (rows=3060 width=414) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_23] (rows=2299950717 width=88) + Group By Operator [GBY_23] (rows=85680 width=414) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1, 0L - Select Operator [SEL_21] (rows=766650239 width=88) + Select Operator [SEL_21] (rows=30601888 width=232) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_82] (rows=766650239 width=88) - Conds:RS_18._col1=RS_101._col0(Inner),Output:["_col3","_col4","_col10","_col11"] + Merge Join Operator [MERGEJOIN_82] (rows=30601888 width=232) + Conds:RS_18._col1=RS_104._col0(Inner),Output:["_col3","_col4","_col10","_col11"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_101] + SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_100] (rows=462000 width=1436) + Select Operator [SEL_103] (rows=462000 width=186) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_99] (rows=462000 width=1436) + Filter Operator [FIL_102] (rows=462000 width=186) predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + TableScan [TS_9] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_81] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_81] (rows=30601888 width=54) Conds:RS_15._col2=RS_93._col0(Inner),Output:["_col1","_col3","_col4"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_93] PartitionCols:_col0 - Select Operator [SEL_92] (rows=1704 width=1910) + Select Operator [SEL_92] (rows=278 width=90) Output:["_col0"] - Filter Operator [FIL_91] (rows=1704 width=1910) + Filter Operator [FIL_91] (rows=278 width=90) predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') and s_store_sk is not null) - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + TableScan [TS_6] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_80] (rows=633595212 width=88) - Conds:RS_109._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_80] (rows=187574154 width=203) + Conds:RS_101._col0=RS_85._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_85] PartitionCols:_col0 - Select Operator [SEL_84] (rows=36524 width=1119) + Select Operator [SEL_84] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_83] (rows=36524 width=1119) + Filter Operator [FIL_83] (rows=652 width=8) predicate:((d_year = 1999) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_108] (rows=575995635 width=88) + Select Operator [SEL_100] (rows=525329897 width=225) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_107] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_d1_d_date_sk_min) AND DynamicValue(RS_13_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_16_store_s_store_sk_min) AND DynamicValue(RS_16_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_16_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + Filter Operator [FIL_99] (rows=525329897 width=225) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_d1_d_date_sk_min) AND DynamicValue(RS_13_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_16_store_s_store_sk_min) AND DynamicValue(RS_16_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_16_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=225) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_98] Group By Operator [GBY_97] (rows=1 width=12) @@ -168,20 +167,9 @@ Stage-0 SHUFFLE [RS_96] Group By Operator [GBY_95] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_94] (rows=1704 width=1910) + Select Operator [SEL_94] (rows=278 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_92] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_102] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_100] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_90] Group By Operator [GBY_89] (rows=1 width=12) @@ -190,7 +178,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_88] Group By Operator [GBY_87] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_86] (rows=36524 width=1119) + Select Operator [SEL_86] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_84] diff --git a/ql/src/test/results/clientpositive/perf/tez/query37.q.out b/ql/src/test/results/clientpositive/perf/tez/query37.q.out index 8799c9fb28..4407a48931 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query37.q.out @@ -43,98 +43,104 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 vectorized - File Output Operator [FS_97] - Limit [LIM_96] (rows=100 width=135) + Reducer 5 vectorized + File Output Operator [FS_103] + Limit [LIM_102] (rows=1 width=396) Number of rows:100 - Select Operator [SEL_95] (rows=316788826 width=135) + Select Operator [SEL_101] (rows=1 width=396) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_94] - Group By Operator [GBY_93] (rows=316788826 width=135) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] + Group By Operator [GBY_99] (rows=1 width=396) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_22] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_21] (rows=633577652 width=135) + Group By Operator [GBY_23] (rows=2 width=396) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Top N Key Operator [TNK_43] (rows=633577652 width=135) + Top N Key Operator [TNK_43] (rows=2871 width=396) keys:_col2, _col3, _col4,sort order:+++,top n:100 - Merge Join Operator [MERGEJOIN_73] (rows=633577652 width=135) - Conds:RS_92._col0=RS_76._col0(Inner),RS_76._col0=RS_18._col1(Inner),Output:["_col2","_col3","_col4"] - <-Map 5 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_76] - PartitionCols:_col0 - Select Operator [SEL_75] (rows=51333 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_74] (rows=51333 width=1436) - predicate:((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] - <-Reducer 8 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_18] + Merge Join Operator [MERGEJOIN_79] (rows=2871 width=396) + Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_72] (rows=4593600 width=15) - Conds:RS_84._col0=RS_87._col0(Inner),Output:["_col1"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_87] + Merge Join Operator [MERGEJOIN_78] (rows=463969 width=4) + Conds:RS_90._col0=RS_93._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] PartitionCols:_col0 - Select Operator [SEL_86] (rows=8116 width=1119) + Select Operator [SEL_92] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_85] (rows=8116 width=1119) + Filter Operator [FIL_91] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_84] + TableScan [TS_9] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_90] PartitionCols:_col0 - Select Operator [SEL_83] (rows=4176000 width=15) + Select Operator [SEL_89] (rows=4176000 width=11) Output:["_col0","_col1"] - Filter Operator [FIL_82] (rows=4176000 width=15) + Filter Operator [FIL_88] (rows=4176000 width=11) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) - TableScan [TS_6] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_92] - PartitionCols:_col0 - Select Operator [SEL_91] (rows=287989836 width=135) - Output:["_col0"] - Filter Operator [FIL_90] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_18_inventory_inv_item_sk_min) AND DynamicValue(RS_18_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_18_inventory_inv_item_sk_bloom_filter))) and cs_item_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_81] - Group By Operator [GBY_80] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_79] - Group By Operator [GBY_78] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_77] (rows=51333 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_75] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_89] - Group By Operator [GBY_88] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4593600)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_51] - Group By Operator [GBY_50] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4593600)"] - Select Operator [SEL_49] (rows=4593600 width=15) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_72] + TableScan [TS_6] (rows=37584000 width=11) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Reducer 2 [ONE_TO_ONE_EDGE] + FORWARD [RS_19] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_77] (rows=1781971 width=400) + Conds:RS_98._col0=RS_82._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_82] + PartitionCols:_col0 + Select Operator [SEL_81] (rows=297 width=404) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_80] (rows=297 width=404) + predicate:((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) + TableScan [TS_3] (rows=462000 width=403) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_98] + PartitionCols:_col0 + Select Operator [SEL_97] (rows=287989836 width=4) + Output:["_col0"] + Filter Operator [FIL_96] (rows=287989836 width=4) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_20_inventory_inv_item_sk_min) AND DynamicValue(RS_20_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_inventory_inv_item_sk_bloom_filter))) and cs_item_sk is not null) + TableScan [TS_0] (rows=287989836 width=4) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_95] + Group By Operator [GBY_94] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_51] + Group By Operator [GBY_50] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_49] (rows=463969 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_78] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_87] + Group By Operator [GBY_86] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_85] + Group By Operator [GBY_84] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_83] (rows=297 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_81] diff --git a/ql/src/test/results/clientpositive/perf/tez/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/query38.q.out index 1f4b4facb6..e91141d836 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query38.q.out @@ -57,20 +57,18 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 19 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 21 <- Reducer 13 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) -Map 22 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 13 (BROADCAST_EDGE) +Map 20 <- Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 22 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 14 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) @@ -82,197 +80,175 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_244] - Limit [LIM_243] (rows=1 width=16) + File Output Operator [FS_236] + Limit [LIM_235] (rows=1 width=8) Number of rows:100 - Group By Operator [GBY_242] (rows=1 width=16) + Group By Operator [GBY_234] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_241] - Group By Operator [GBY_240] (rows=1 width=16) + PARTITION_ONLY_SHUFFLE [RS_233] + Group By Operator [GBY_232] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_239] (rows=1 width=108) - Filter Operator [FIL_238] (rows=1 width=108) + Select Operator [SEL_231] (rows=1 width=8) + Filter Operator [FIL_230] (rows=1 width=8) predicate:(_col3 = 3L) - Select Operator [SEL_237] (rows=152458212 width=108) + Select Operator [SEL_229] (rows=165330890 width=8) Output:["_col3"] - Group By Operator [GBY_236] (rows=152458212 width=108) + Group By Operator [GBY_228] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 5 [SIMPLE_EDGE] <-Reducer 12 [CONTAINS] vectorized - Reduce Output Operator [RS_256] + Reduce Output Operator [RS_246] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_255] (rows=304916424 width=108) + Group By Operator [GBY_245] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_254] (rows=87116929 width=135) + Group By Operator [GBY_244] (rows=49146883 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_253] (rows=174233858 width=135) + Select Operator [SEL_243] (rows=49146883 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_252] (rows=174233858 width=135) + Group By Operator [GBY_242] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_41] (rows=348467716 width=135) + Group By Operator [GBY_41] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_178] (rows=348467716 width=135) - Conds:RS_37._col1=RS_219._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_178] (rows=49146883 width=274) + Conds:RS_37._col1=RS_221._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_216] (rows=80000000 width=860) + Select Operator [SEL_219] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_215] (rows=80000000 width=860) + Filter Operator [FIL_218] (rows=80000000 width=184) predicate:c_customer_sk is not null - TableScan [TS_6] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name"] + TableScan [TS_6] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=316788826 width=135) - Conds:RS_251._col0=RS_203._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_177] (rows=49146883 width=97) + Conds:RS_241._col0=RS_203._col0(Inner),Output:["_col1","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_203] PartitionCols:_col0 - Select Operator [SEL_200] (rows=73049 width=1119) + Select Operator [SEL_200] (rows=317 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_199] (rows=73049 width=1119) + Filter Operator [FIL_199] (rows=317 width=102) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_250] (rows=287989836 width=135) + Select Operator [SEL_240] (rows=285117831 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_38_customer_c_customer_sk_min) AND DynamicValue(RS_38_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_38_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_25] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + Filter Operator [FIL_239] (rows=285117831 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_25] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_246] - Group By Operator [GBY_245] (rows=1 width=12) + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_211] Group By Operator [GBY_208] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_204] (rows=73049 width=1119) + Select Operator [SEL_204] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_200] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_248] - Group By Operator [GBY_247] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - Group By Operator [GBY_223] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_220] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_266] + Reduce Output Operator [RS_256] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_265] (rows=304916424 width=108) + Group By Operator [GBY_255] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_264] (rows=43560808 width=135) + Group By Operator [GBY_254] (rows=24986582 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_263] (rows=87121617 width=135) + Select Operator [SEL_253] (rows=24986582 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_262] (rows=87121617 width=135) + Group By Operator [GBY_252] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_67] (rows=174243235 width=135) + Group By Operator [GBY_67] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_180] (rows=174243235 width=135) - Conds:RS_63._col1=RS_221._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_180] (rows=24986582 width=274) + Conds:RS_63._col1=RS_222._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + SHUFFLE [RS_222] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_219] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=158402938 width=135) - Conds:RS_261._col0=RS_205._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_179] (rows=24986582 width=97) + Conds:RS_251._col0=RS_205._col0(Inner),Output:["_col1","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_205] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_200] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_261] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_260] (rows=144002668 width=135) + Select Operator [SEL_250] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_259] (rows=144002668 width=135) + Filter Operator [FIL_249] (rows=143930993 width=7) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_51] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + TableScan [TS_51] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_258] - Group By Operator [GBY_257] (rows=1 width=12) + BROADCAST [RS_248] + Group By Operator [GBY_247] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_212] Group By Operator [GBY_209] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_206] (rows=73049 width=1119) + Select Operator [SEL_206] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_200] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_235] + Reduce Output Operator [RS_227] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_234] (rows=304916424 width=108) + Group By Operator [GBY_226] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_233] (rows=174238687 width=88) + Group By Operator [GBY_225] (rows=91197425 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_232] (rows=348477374 width=88) + Select Operator [SEL_224] (rows=91197425 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_231] (rows=348477374 width=88) + Group By Operator [GBY_223] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_176] (rows=696954748 width=88) - Conds:RS_12._col1=RS_217._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_176] (rows=91197425 width=274) + Conds:RS_12._col1=RS_220._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_220] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_219] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_175] (rows=633595212 width=88) - Conds:RS_230._col0=RS_201._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_175] (rows=91197425 width=96) + Conds:RS_217._col0=RS_201._col0(Inner),Output:["_col1","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_201] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_200] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_229] (rows=575995635 width=88) + Select Operator [SEL_216] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_228] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_13_customer_c_customer_sk_min) AND DynamicValue(RS_13_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_13_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] - Group By Operator [GBY_222] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_218] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Filter Operator [FIL_215] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_214] Group By Operator [GBY_213] (rows=1 width=12) @@ -281,7 +257,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_210] Group By Operator [GBY_207] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_202] (rows=73049 width=1119) + Select Operator [SEL_202] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_200] diff --git a/ql/src/test/results/clientpositive/perf/tez/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/query39.q.out index 6830287b5c..103e921e70 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query39.q.out @@ -80,59 +80,59 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_232] - Select Operator [SEL_231] (rows=13756683 width=15) + Select Operator [SEL_231] (rows=189509 width=56) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_60] - Merge Join Operator [MERGEJOIN_202] (rows=13756683 width=15) + Merge Join Operator [MERGEJOIN_202] (rows=189509 width=48) Conds:RS_225._col0, _col1=RS_230._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_230] PartitionCols:_col0, _col1 - Select Operator [SEL_229] (rows=12506076 width=15) + Select Operator [SEL_229] (rows=18049 width=24) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_228] (rows=12506076 width=15) + Filter Operator [FIL_228] (rows=18049 width=40) predicate:CASE WHEN (((UDFToDouble(_col3) / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (UDFToDouble(_col3) / _col4)) > 1.0D)) END - Select Operator [SEL_227] (rows=25012152 width=15) + Select Operator [SEL_227] (rows=36099 width=40) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_226] (rows=25012152 width=15) + Group By Operator [GBY_226] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_51] (rows=50024305 width=15) + Group By Operator [GBY_51] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 - Select Operator [SEL_49] (rows=50024305 width=15) + Select Operator [SEL_49] (rows=1032514 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_201] (rows=50024305 width=15) + Merge Join Operator [MERGEJOIN_201] (rows=1032514 width=108) Conds:RS_46._col2=RS_220._col0(Inner),Output:["_col3","_col7","_col8","_col9"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_218] (rows=27 width=1029) + Select Operator [SEL_218] (rows=27 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_217] (rows=27 width=1029) + Filter Operator [FIL_217] (rows=27 width=104) predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + TableScan [TS_9] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_200] (rows=45476640 width=15) + Merge Join Operator [MERGEJOIN_200] (rows=1032514 width=8) Conds:RS_43._col1=RS_216._col0(Inner),Output:["_col2","_col3","_col7"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_216] PartitionCols:_col0 - Select Operator [SEL_214] (rows=462000 width=1436) + Select Operator [SEL_214] (rows=462000 width=4) Output:["_col0"] - Filter Operator [FIL_213] (rows=462000 width=1436) + Filter Operator [FIL_213] (rows=462000 width=4) predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk"] + TableScan [TS_6] (rows=462000 width=4) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_199] (rows=41342400 width=15) + Merge Join Operator [MERGEJOIN_199] (rows=1032514 width=8) Conds:RS_206._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_206] @@ -142,35 +142,35 @@ Stage-0 Filter Operator [FIL_203] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_210] (rows=18262 width=1119) + Select Operator [SEL_210] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_208] (rows=18262 width=1119) + Filter Operator [FIL_208] (rows=50 width=12) predicate:((d_moy = 5) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_225] PartitionCols:_col0, _col1 - Select Operator [SEL_224] (rows=12506076 width=15) + Select Operator [SEL_224] (rows=18049 width=24) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_223] (rows=12506076 width=15) + Filter Operator [FIL_223] (rows=18049 width=40) predicate:CASE WHEN (((UDFToDouble(_col3) / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (UDFToDouble(_col3) / _col4)) > 1.0D)) END - Select Operator [SEL_222] (rows=25012152 width=15) + Select Operator [SEL_222] (rows=36099 width=40) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_221] (rows=25012152 width=15) + Group By Operator [GBY_221] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_23] (rows=50024305 width=15) + Group By Operator [GBY_23] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 - Select Operator [SEL_21] (rows=50024305 width=15) + Select Operator [SEL_21] (rows=1032514 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_198] (rows=50024305 width=15) + Merge Join Operator [MERGEJOIN_198] (rows=1032514 width=108) Conds:RS_18._col2=RS_219._col0(Inner),Output:["_col3","_col7","_col8","_col9"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_219] @@ -179,7 +179,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_197] (rows=45476640 width=15) + Merge Join Operator [MERGEJOIN_197] (rows=1032514 width=8) Conds:RS_15._col1=RS_215._col0(Inner),Output:["_col2","_col3","_col7"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_215] @@ -188,7 +188,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_196] (rows=41342400 width=15) + Merge Join Operator [MERGEJOIN_196] (rows=1032514 width=8) Conds:RS_205._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_205] @@ -197,9 +197,9 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_211] PartitionCols:_col0 - Select Operator [SEL_209] (rows=18262 width=1119) + Select Operator [SEL_209] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_207] (rows=18262 width=1119) + Filter Operator [FIL_207] (rows=50 width=12) predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/query4.q.out index 02878ffc39..27ce7b5b89 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out @@ -229,431 +229,409 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 30 (BROADCAST_EDGE) -Map 11 <- Reducer 32 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE) -Map 15 <- Reducer 33 (BROADCAST_EDGE) -Map 19 <- Reducer 29 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE) -Map 23 <- Reducer 28 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) -Map 7 <- Reducer 31 (BROADCAST_EDGE), Reducer 37 (BROADCAST_EDGE) +Map 1 <- Reducer 34 (BROADCAST_EDGE) +Map 11 <- Reducer 35 (BROADCAST_EDGE) +Map 15 <- Reducer 36 (BROADCAST_EDGE) +Map 19 <- Reducer 37 (BROADCAST_EDGE) +Map 23 <- Reducer 33 (BROADCAST_EDGE) +Map 27 <- Reducer 32 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 13 <- Map 34 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 13 <- Map 38 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 17 <- Map 34 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 17 <- Map 38 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 21 <- Map 34 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 21 <- Map 38 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 25 <- Map 34 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 25 <- Map 38 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 34 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 27 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 34 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 34 (CUSTOM_SIMPLE_EDGE) -Reducer 38 <- Map 34 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 29 <- Map 38 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 38 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 31 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 27 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 34 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 8 <- Reducer 26 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +Reducer 9 <- Reducer 30 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_587] - Limit [LIM_586] (rows=100 width=88) + Reducer 10 vectorized + File Output Operator [FS_575] + Limit [LIM_574] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_585] (rows=479156399 width=88) + Select Operator [SEL_573] (rows=7323197 width=85) Output:["_col0"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_139] - Select Operator [SEL_138] (rows=479156399 width=88) + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_147] + Select Operator [SEL_146] (rows=7323197 width=85) Output:["_col0"] - Filter Operator [FIL_136] (rows=479156399 width=88) - predicate:(CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > (_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END and CASE WHEN (_col7 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col1 / _col7))) ELSE ((null > (_col1 / _col7))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END) - Merge Join Operator [MERGEJOIN_470] (rows=1916625598 width=88) - Conds:RS_530._col0=RS_542._col0(Inner),RS_542._col0=RS_554._col0(Inner),RS_542._col0=RS_564._col0(Inner),RS_542._col0=RS_574._col0(Inner),RS_542._col0=RS_584._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col9","_col11","_col12"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_542] + Filter Operator [FIL_145] (rows=7323197 width=533) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > (_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_478] (rows=14646395 width=533) + Conds:RS_142._col2=RS_572._col0(Inner),Output:["_col3","_col5","_col9","_col11","_col12"] + <-Reducer 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_572] PartitionCols:_col0 - Select Operator [SEL_541] (rows=116159124 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_540] (rows=116159124 width=88) - predicate:(_col7 > 0) - Select Operator [SEL_539] (rows=348477374 width=88) - Output:["_col0","_col7"] - Group By Operator [GBY_538] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_38] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_36] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_461] (rows=696954748 width=88) - Conds:RS_33._col1=RS_515._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_515] - PartitionCols:_col0 - Select Operator [SEL_509] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_508] (rows=80000000 width=860) - predicate:(c_customer_id is not null and c_customer_sk is not null) - TableScan [TS_114] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_460] (rows=633595212 width=88) - Conds:RS_537._col0=RS_485._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_485] - PartitionCols:_col0 - Select Operator [SEL_476] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_472] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_111] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_537] - PartitionCols:_col0 - Select Operator [SEL_536] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_535] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_34_customer_c_customer_sk_min) AND DynamicValue(RS_34_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_34_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_21] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_532] - Group By Operator [GBY_531] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_500] - Group By Operator [GBY_494] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_486] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_476] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_534] - Group By Operator [GBY_533] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_526] - Group By Operator [GBY_522] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_516] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_509] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_554] - PartitionCols:_col0 - Select Operator [SEL_553] (rows=58077952 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_552] (rows=58077952 width=135) - predicate:(_col7 > 0) - Select Operator [SEL_551] (rows=174233858 width=135) - Output:["_col0","_col7"] - Group By Operator [GBY_550] (rows=174233858 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_60] (rows=348467716 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_58] (rows=348467716 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_463] (rows=348467716 width=135) - Conds:RS_55._col1=RS_517._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_517] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_509] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_462] (rows=316788826 width=135) - Conds:RS_549._col0=RS_487._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_487] - PartitionCols:_col0 - Select Operator [SEL_477] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_473] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_111] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_549] - PartitionCols:_col0 - Select Operator [SEL_548] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_547] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_56_customer_c_customer_sk_min) AND DynamicValue(RS_56_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_56_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_43] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_544] - Group By Operator [GBY_543] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_501] - Group By Operator [GBY_495] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_488] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_477] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_546] - Group By Operator [GBY_545] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_527] - Group By Operator [GBY_523] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_518] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_509] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_564] - PartitionCols:_col0 - Select Operator [SEL_563] (rows=29040539 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_562] (rows=29040539 width=135) - predicate:(_col7 > 0) - Select Operator [SEL_561] (rows=87121617 width=135) - Output:["_col0","_col7"] - Group By Operator [GBY_560] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_83] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_82] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_80] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_465] (rows=174243235 width=135) - Conds:RS_77._col1=RS_519._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_519] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_509] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_77] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_464] (rows=158402938 width=135) - Conds:RS_559._col0=RS_489._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_489] - PartitionCols:_col0 - Select Operator [SEL_478] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_474] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_111] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_559] - PartitionCols:_col0 - Select Operator [SEL_558] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_557] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_65] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_556] - Group By Operator [GBY_555] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_502] - Group By Operator [GBY_496] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_490] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_478] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_574] - PartitionCols:_col0 - Select Operator [SEL_573] (rows=174233858 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_572] (rows=174233858 width=135) + Select Operator [SEL_571] (rows=80000000 width=297) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_570] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_126] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_104] (rows=348467716 width=135) + Group By Operator [GBY_125] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_102] (rows=348467716 width=135) + Select Operator [SEL_123] (rows=187573258 width=1043) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_467] (rows=348467716 width=135) - Conds:RS_99._col1=RS_512._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_512] + Merge Join Operator [MERGEJOIN_473] (rows=187573258 width=1043) + Conds:RS_120._col1=RS_518._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_518] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_509] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_99] + Select Operator [SEL_517] (rows=80000000 width=656) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_516] (rows=80000000 width=656) + predicate:(c_customer_id is not null and c_customer_sk is not null) + TableScan [TS_114] (rows=80000000 width=656) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_120] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_466] (rows=316788826 width=135) - Conds:RS_571._col0=RS_481._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_481] + Merge Join Operator [MERGEJOIN_472] (rows=187573258 width=395) + Conds:RS_569._col0=RS_487._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_487] PartitionCols:_col0 - Select Operator [SEL_475] (rows=36524 width=1119) + Select Operator [SEL_483] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_471] (rows=36524 width=1119) + Filter Operator [FIL_479] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_111] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_571] + TableScan [TS_111] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_569] PartitionCols:_col0 - Select Operator [SEL_570] (rows=287989836 width=135) + Select Operator [SEL_568] (rows=525327388 width=435) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_569] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_100_customer_c_customer_sk_min) AND DynamicValue(RS_100_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_100_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_87] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 29 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_567] (rows=525327388 width=435) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_118_date_dim_d_date_sk_min) AND DynamicValue(RS_118_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_118_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_108] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 32 [BROADCAST_EDGE] vectorized BROADCAST [RS_566] Group By Operator [GBY_565] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_498] - Group By Operator [GBY_492] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_482] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_568] - Group By Operator [GBY_567] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_525] - Group By Operator [GBY_521] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_513] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_509] - <-Reducer 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_584] - PartitionCols:_col0 - Select Operator [SEL_583] (rows=348477374 width=88) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_582] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_126] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_125] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_123] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_469] (rows=696954748 width=88) - Conds:RS_120._col1=RS_510._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_509] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_120] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_468] (rows=633595212 width=88) - Conds:RS_581._col0=RS_479._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_479] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_475] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_581] - PartitionCols:_col0 - Select Operator [SEL_580] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_579] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_121_customer_c_customer_sk_min) AND DynamicValue(RS_121_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_121_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_118_date_dim_d_date_sk_min) AND DynamicValue(RS_118_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_118_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_108] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_576] - Group By Operator [GBY_575] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_497] - Group By Operator [GBY_491] (rows=1 width=12) + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_505] + Group By Operator [GBY_499] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_480] (rows=36524 width=1119) + Select Operator [SEL_488] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_578] - Group By Operator [GBY_577] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_524] - Group By Operator [GBY_520] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_511] (rows=80000000 width=860) + Please refer to the previous Select Operator [SEL_483] + <-Reducer 8 [ONE_TO_ONE_EDGE] + FORWARD [RS_142] + PartitionCols:_col2 + Filter Operator [FIL_141] (rows=12248093 width=660) + predicate:CASE WHEN (_col7 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col1 / _col7))) ELSE ((null > (_col1 / _col7))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_477] (rows=24496186 width=660) + Conds:RS_138._col2=RS_564._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col7","_col9"] + <-Reducer 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_564] + PartitionCols:_col0 + Select Operator [SEL_563] (rows=80000000 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_562] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_104] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_102] (rows=101084444 width=1093) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_471] (rows=101084444 width=1093) + Conds:RS_99._col1=RS_519._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_519] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_517] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_470] (rows=101084444 width=445) + Conds:RS_561._col0=RS_489._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_489] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_483] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_561] + PartitionCols:_col0 + Select Operator [SEL_560] (rows=285117831 width=453) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_559] (rows=285117831 width=453) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_87] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_558] + Group By Operator [GBY_557] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_506] + Group By Operator [GBY_500] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_490] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_483] + <-Reducer 7 [ONE_TO_ONE_EDGE] + FORWARD [RS_138] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_476] (rows=20485011 width=548) + Conds:RS_135._col2=RS_556._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col7"] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_556] + PartitionCols:_col0 + Select Operator [SEL_555] (rows=17130654 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_554] (rows=17130654 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_553] (rows=51391963 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_552] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_82] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_80] (rows=51391963 width=1099) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_469] (rows=51391963 width=1099) + Conds:RS_77._col1=RS_523._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_523] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_517] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_468] (rows=51391963 width=451) + Conds:RS_551._col0=RS_497._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_497] + PartitionCols:_col0 + Select Operator [SEL_486] (rows=652 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_509] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_530] - PartitionCols:_col0 - Select Operator [SEL_529] (rows=87121617 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_528] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_17] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_15] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_459] (rows=174243235 width=135) - Conds:RS_12._col1=RS_514._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_514] + Filter Operator [FIL_482] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_111] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_551] + PartitionCols:_col0 + Select Operator [SEL_550] (rows=143930993 width=455) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_549] (rows=143930993 width=455) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_65] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_548] + Group By Operator [GBY_547] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_510] + Group By Operator [GBY_504] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_498] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_486] + <-Reducer 6 [ONE_TO_ONE_EDGE] + FORWARD [RS_135] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_475] (rows=31888273 width=436) + Conds:RS_132._col2=RS_546._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_546] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_509] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_458] (rows=158402938 width=135) - Conds:RS_507._col0=RS_483._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_483] + Select Operator [SEL_545] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_544] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_543] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_542] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_60] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_58] (rows=101084444 width=1093) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_467] (rows=101084444 width=1093) + Conds:RS_55._col1=RS_522._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_522] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_517] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_466] (rows=101084444 width=445) + Conds:RS_541._col0=RS_495._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_495] + PartitionCols:_col0 + Select Operator [SEL_485] (rows=652 width=8) + Output:["_col0"] + Filter Operator [FIL_481] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_111] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_541] + PartitionCols:_col0 + Select Operator [SEL_540] (rows=285117831 width=453) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_539] (rows=285117831 width=453) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_43] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 36 [BROADCAST_EDGE] vectorized + BROADCAST [RS_538] + Group By Operator [GBY_537] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_509] + Group By Operator [GBY_503] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_496] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_485] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_132] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_474] (rows=31888273 width=324) + Conds:RS_526._col0=RS_536._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_536] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_475] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] + Select Operator [SEL_535] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_534] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_533] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_532] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_38] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_36] (rows=187573258 width=1043) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_465] (rows=187573258 width=1043) + Conds:RS_33._col1=RS_521._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_521] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_517] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_464] (rows=187573258 width=395) + Conds:RS_531._col0=RS_493._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_493] + PartitionCols:_col0 + Select Operator [SEL_484] (rows=652 width=8) + Output:["_col0"] + Filter Operator [FIL_480] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_111] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_531] + PartitionCols:_col0 + Select Operator [SEL_530] (rows=525327388 width=435) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_529] (rows=525327388 width=435) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_21] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 35 [BROADCAST_EDGE] vectorized + BROADCAST [RS_528] + Group By Operator [GBY_527] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_508] + Group By Operator [GBY_502] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_494] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_484] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_526] PartitionCols:_col0 - Select Operator [SEL_506] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_505] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_504] - Group By Operator [GBY_503] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_499] - Group By Operator [GBY_493] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_484] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] + Select Operator [SEL_525] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_524] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_17] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator [SEL_15] (rows=51391963 width=1099) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_463] (rows=51391963 width=1099) + Conds:RS_12._col1=RS_520._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_520] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_517] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_462] (rows=51391963 width=451) + Conds:RS_515._col0=RS_491._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 31 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_491] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_483] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_515] + PartitionCols:_col0 + Select Operator [SEL_514] (rows=143930993 width=455) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_513] (rows=143930993 width=455) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_512] + Group By Operator [GBY_511] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_507] + Group By Operator [GBY_501] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_492] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_483] diff --git a/ql/src/test/results/clientpositive/perf/tez/query40.q.out b/ql/src/test/results/clientpositive/perf/tez/query40.q.out index c43f317ee0..1cca18d306 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -67,10 +67,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -83,77 +82,77 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_136] - Limit [LIM_135] (rows=100 width=135) + File Output Operator [FS_131] + Limit [LIM_130] (rows=100 width=410) Number of rows:100 - Select Operator [SEL_134] (rows=210822976 width=135) + Select Operator [SEL_129] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] - Group By Operator [GBY_132] (rows=210822976 width=135) + SHUFFLE [RS_128] + Group By Operator [GBY_127] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1 - Group By Operator [GBY_29] (rows=421645953 width=135) + Group By Operator [GBY_29] (rows=5757278 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Top N Key Operator [TNK_55] (rows=421645953 width=135) + Top N Key Operator [TNK_55] (rows=5757278 width=364) keys:_col0, _col1,sort order:++,top n:100 - Select Operator [SEL_27] (rows=421645953 width=135) + Select Operator [SEL_27] (rows=5757278 width=364) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_101] (rows=421645953 width=135) - Conds:RS_24._col1=RS_120._col0(Inner),Output:["_col4","_col7","_col9","_col11","_col14"] + Merge Join Operator [MERGEJOIN_101] (rows=5757278 width=364) + Conds:RS_24._col1=RS_126._col0(Inner),Output:["_col4","_col7","_col9","_col11","_col14"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_119] (rows=27 width=1029) + Select Operator [SEL_125] (rows=27 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_118] (rows=27 width=1029) + Filter Operator [FIL_124] (rows=27 width=90) predicate:w_warehouse_sk is not null - TableScan [TS_12] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_state"] + TableScan [TS_12] (rows=27 width=90) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_100] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_100] (rows=5757278 width=281) Conds:RS_21._col2=RS_112._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_112] PartitionCols:_col0 - Select Operator [SEL_111] (rows=51333 width=1436) + Select Operator [SEL_111] (rows=51333 width=215) Output:["_col0","_col1"] - Filter Operator [FIL_110] (rows=51333 width=1436) + Filter Operator [FIL_110] (rows=51333 width=215) predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) - TableScan [TS_9] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_current_price"] + TableScan [TS_9] (rows=462000 width=215) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_99] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_99] (rows=51815831 width=210) Conds:RS_18._col0=RS_104._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_103] (rows=8116 width=1119) + Select Operator [SEL_103] (rows=8116 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_102] (rows=8116 width=1119) + Filter Operator [FIL_102] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_98] (rows=316788826 width=135) - Conds:RS_128._col2, _col3=RS_131._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + Merge Join Operator [MERGEJOIN_98] (rows=466374405 width=167) + Conds:RS_120._col2, _col3=RS_123._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + SHUFFLE [RS_120] PartitionCols:_col2, _col3 - Select Operator [SEL_127] (rows=287989836 width=135) + Select Operator [SEL_119] (rows=285115816 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_126] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (cs_warehouse_sk BETWEEN DynamicValue(RS_25_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_25_warehouse_w_warehouse_sk_max) and in_bloom_filter(cs_warehouse_sk, DynamicValue(RS_25_warehouse_w_warehouse_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] + Filter Operator [FIL_118] (rows=285115816 width=127) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_0] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_109] Group By Operator [GBY_108] (rows=1 width=12) @@ -162,7 +161,7 @@ Stage-0 SHUFFLE [RS_107] Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_105] (rows=8116 width=1119) + Select Operator [SEL_105] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_103] <-Reducer 12 [BROADCAST_EDGE] vectorized @@ -173,27 +172,16 @@ Stage-0 SHUFFLE [RS_115] Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_113] (rows=51333 width=1436) + Select Operator [SEL_113] (rows=51333 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_111] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_125] - Group By Operator [GBY_124] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_121] (rows=27 width=1029) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_119] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] + SHUFFLE [RS_123] PartitionCols:_col0, _col1 - Select Operator [SEL_130] (rows=28798881 width=106) + Select Operator [SEL_122] (rows=28798881 width=117) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_129] (rows=28798881 width=106) + Filter Operator [FIL_121] (rows=28798881 width=117) predicate:cr_item_sk is not null - TableScan [TS_3] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] + TableScan [TS_3] (rows=28798881 width=117) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query42.q.out b/ql/src/test/results/clientpositive/perf/tez/query42.q.out index 9c82580eca..a458f5e095 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query42.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query42.q.out @@ -65,55 +65,55 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_79] - Limit [LIM_78] (rows=100 width=88) + Limit [LIM_78] (rows=100 width=210) Number of rows:100 - Select Operator [SEL_77] (rows=348477374 width=88) + Select Operator [SEL_77] (rows=110 width=210) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_76] - Select Operator [SEL_75] (rows=348477374 width=88) + Select Operator [SEL_75] (rows=110 width=318) Output:["_col0","_col1","_col3"] - Group By Operator [GBY_74] (rows=348477374 width=88) + Group By Operator [GBY_74] (rows=110 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=120 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8 - Merge Join Operator [MERGEJOIN_54] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_54] (rows=2301098 width=94) Conds:RS_12._col1=RS_65._col0(Inner),Output:["_col2","_col7","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_64] (rows=231000 width=1436) + Select Operator [SEL_64] (rows=7333 width=101) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_63] (rows=231000 width=1436) + Filter Operator [FIL_63] (rows=7333 width=101) predicate:((i_manager_id = 1) and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_category_id","i_category","i_manager_id"] + TableScan [TS_6] (rows=462000 width=101) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category_id","i_category","i_manager_id"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_53] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_53] (rows=15062131 width=4) Conds:RS_73._col0=RS_57._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_57] PartitionCols:_col0 - Select Operator [SEL_56] (rows=18262 width=1119) + Select Operator [SEL_56] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_55] (rows=18262 width=1119) + Filter Operator [FIL_55] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,dt,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,dt,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_73] PartitionCols:_col0 - Select Operator [SEL_72] (rows=575995635 width=88) + Select Operator [SEL_72] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_71] (rows=575995635 width=88) + Filter Operator [FIL_71] (rows=550076554 width=114) predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_dt_d_date_sk_min) AND DynamicValue(RS_10_dt_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_dt_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_62] Group By Operator [GBY_61] (rows=1 width=12) @@ -122,7 +122,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_60] Group By Operator [GBY_59] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_58] (rows=18262 width=1119) + Select Operator [SEL_58] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_56] <-Reducer 9 [BROADCAST_EDGE] vectorized @@ -133,7 +133,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_68] Group By Operator [GBY_67] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_66] (rows=231000 width=1436) + Select Operator [SEL_66] (rows=7333 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_64] diff --git a/ql/src/test/results/clientpositive/perf/tez/query43.q.out b/ql/src/test/results/clientpositive/perf/tez/query43.q.out index f1bce44849..f75929bbb7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query43.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query43.q.out @@ -59,57 +59,57 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_79] - Limit [LIM_78] (rows=100 width=88) + Limit [LIM_78] (rows=100 width=972) Number of rows:100 - Select Operator [SEL_77] (rows=348477374 width=88) + Select Operator [SEL_77] (rows=3751 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_76] - Group By Operator [GBY_75] (rows=348477374 width=88) + Group By Operator [GBY_75] (rows=3751 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=696954748 width=88) + Group By Operator [GBY_17] (rows=176297 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Top N Key Operator [TNK_33] (rows=696954748 width=88) + Top N Key Operator [TNK_33] (rows=37536846 width=320) keys:_col0, _col1,sort order:++,top n:100 - Select Operator [SEL_15] (rows=696954748 width=88) + Select Operator [SEL_15] (rows=37536846 width=320) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_55] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_55] (rows=37536846 width=320) Conds:RS_12._col1=RS_66._col0(Inner),Output:["_col2","_col5","_col7","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col0 - Select Operator [SEL_65] (rows=852 width=1910) + Select Operator [SEL_65] (rows=341 width=304) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_64] (rows=852 width=1910) + Filter Operator [FIL_64] (rows=341 width=303) predicate:((s_gmt_offset = -6) and s_store_sk is not null) - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] + TableScan [TS_6] (rows=1704 width=303) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_54] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_54] (rows=187574154 width=192) Conds:RS_74._col0=RS_58._col0(Inner),Output:["_col1","_col2","_col5"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_58] PartitionCols:_col0 - Select Operator [SEL_57] (rows=36524 width=1119) + Select Operator [SEL_57] (rows=652 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_56] (rows=36524 width=1119) + Filter Operator [FIL_56] (rows=652 width=99) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_day_name"] + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_73] (rows=575995635 width=88) + Select Operator [SEL_73] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_72] (rows=575995635 width=88) + Filter Operator [FIL_72] (rows=525329897 width=114) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_13_store_s_store_sk_min) AND DynamicValue(RS_13_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_13_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_63] Group By Operator [GBY_62] (rows=1 width=12) @@ -118,7 +118,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_61] Group By Operator [GBY_60] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_59] (rows=36524 width=1119) + Select Operator [SEL_59] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_57] <-Reducer 9 [BROADCAST_EDGE] vectorized @@ -129,7 +129,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_69] Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_67] (rows=852 width=1910) + Select Operator [SEL_67] (rows=341 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out index 1934f4c4bb..0ec015a3eb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -92,86 +92,86 @@ Stage-0 Stage-1 Reducer 4 vectorized File Output Operator [FS_138] - Limit [LIM_137] (rows=100 width=177) + Limit [LIM_137] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_136] (rows=1393898919384048 width=177) + Select Operator [SEL_136] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_69] - Select Operator [SEL_68] (rows=1393898919384048 width=177) + Select Operator [SEL_68] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_107] (rows=1393898919384048 width=177) + Merge Join Operator [MERGEJOIN_107] (rows=6951 width=218) Conds:RS_65._col3=RS_66._col3(Inner),Output:["_col1","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_104] (rows=1267180808338276 width=177) + Merge Join Operator [MERGEJOIN_104] (rows=6951 width=111) Conds:RS_110._col0=RS_130._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_109] (rows=462000 width=1436) + Select Operator [SEL_109] (rows=462000 width=111) Output:["_col0","_col1"] - Filter Operator [FIL_108] (rows=462000 width=1436) + Filter Operator [FIL_108] (rows=462000 width=111) predicate:i_item_sk is not null - TableScan [TS_0] (rows=462000 width=1436) - default@item,i1,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_product_name"] + TableScan [TS_0] (rows=462000 width=111) + default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_129] (rows=1151982528066248 width=177) + Select Operator [SEL_129] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_128] (rows=1151982528066248 width=177) + Filter Operator [FIL_128] (rows=6951 width=116) predicate:((rank_window_0 < 11) and _col0 is not null) - PTF Operator [PTF_127] (rows=3455947584198744 width=177) + PTF Operator [PTF_127] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] - Select Operator [SEL_126] (rows=3455947584198744 width=177) + Select Operator [SEL_126] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:0 - Filter Operator [FIL_21] (rows=3455947584198744 width=177) + Filter Operator [FIL_21] (rows=20854 width=228) predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_103] (rows=10367842752596232 width=177) + Merge Join Operator [MERGEJOIN_103] (rows=62562 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_125] - Select Operator [SEL_124] (rows=71999454 width=88) + Select Operator [SEL_124] (rows=1 width=112) Output:["_col0"] - Group By Operator [GBY_123] (rows=71999454 width=88) + Group By Operator [GBY_123] (rows=1 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_122] PartitionCols:_col0 - Group By Operator [GBY_121] (rows=143998908 width=88) + Group By Operator [GBY_121] (rows=258 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_120] (rows=143998908 width=88) + Select Operator [SEL_120] (rows=287946 width=114) Output:["_col1"] - Filter Operator [FIL_119] (rows=143998908 width=88) + Filter Operator [FIL_119] (rows=287946 width=114) predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + TableScan [TS_10] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_118] - Select Operator [SEL_117] (rows=143998908 width=88) + Select Operator [SEL_117] (rows=62562 width=116) Output:["_col0","_col1"] - Group By Operator [GBY_116] (rows=143998908 width=88) + Group By Operator [GBY_116] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_114] (rows=287997817 width=88) + Group By Operator [GBY_114] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_113] (rows=287997817 width=88) + Select Operator [SEL_113] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_112] (rows=287997817 width=88) + Filter Operator [FIL_112] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,ss1,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + TableScan [TS_3] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_106] (rows=1267180808338276 width=177) + Merge Join Operator [MERGEJOIN_106] (rows=6951 width=111) Conds:RS_111._col0=RS_135._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] @@ -180,13 +180,13 @@ Stage-0 <-Reducer 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_134] (rows=1151982528066248 width=177) + Select Operator [SEL_134] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_133] (rows=1151982528066248 width=177) + Filter Operator [FIL_133] (rows=6951 width=116) predicate:((rank_window_0 < 11) and _col0 is not null) - PTF Operator [PTF_132] (rows=3455947584198744 width=177) + PTF Operator [PTF_132] (rows=20854 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_131] (rows=3455947584198744 width=177) + Select Operator [SEL_131] (rows=20854 width=116) Output:["_col0","_col1"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_51] diff --git a/ql/src/test/results/clientpositive/perf/tez/query45.q.out b/ql/src/test/results/clientpositive/perf/tez/query45.q.out index 75c8d03bb2..c2f9374c4b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query45.q.out @@ -52,168 +52,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 8 (SIMPLE_EDGE) -Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 8 (SIMPLE_EDGE) +Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 12 (ONE_TO_ONE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 11 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_173] - Limit [LIM_172] (rows=100 width=152) + File Output Operator [FS_171] + Limit [LIM_170] (rows=100 width=299) Number of rows:100 - Select Operator [SEL_171] (rows=95833781 width=152) + Select Operator [SEL_169] (rows=285780 width=299) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] - Group By Operator [GBY_169] (rows=95833781 width=152) + SHUFFLE [RS_168] + Group By Operator [GBY_167] (rows=285780 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col0, _col1 - Group By Operator [GBY_52] (rows=191667562 width=152) + Group By Operator [GBY_52] (rows=4858260 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 - Top N Key Operator [TNK_82] (rows=191667562 width=152) + Top N Key Operator [TNK_82] (rows=10246864 width=418) keys:_col8, _col7,sort order:++,top n:100 - Select Operator [SEL_51] (rows=191667562 width=152) + Select Operator [SEL_51] (rows=10246864 width=418) Output:["_col3","_col7","_col8"] - Filter Operator [FIL_50] (rows=191667562 width=152) + Filter Operator [FIL_50] (rows=10246864 width=418) predicate:((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or CASE WHEN ((_col14 = 0L)) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col13 is null) THEN (null) WHEN ((_col15 < _col14)) THEN (null) ELSE (false) END) - Select Operator [SEL_49] (rows=191667562 width=152) + Select Operator [SEL_49] (rows=10246864 width=418) Output:["_col3","_col7","_col8","_col13","_col14","_col15","_col17"] - Merge Join Operator [MERGEJOIN_133] (rows=191667562 width=152) + Merge Join Operator [MERGEJOIN_133] (rows=10246864 width=418) Conds:(Inner),Output:["_col3","_col4","_col6","_col8","_col12","_col16","_col17"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_168] - Group By Operator [GBY_167] (rows=1 width=16) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_166] + Group By Operator [GBY_165] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_150] Group By Operator [GBY_148] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count()","count(i_item_id)"] - Select Operator [SEL_145] (rows=462000 width=1436) + Select Operator [SEL_145] (rows=11 width=104) Output:["i_item_id"] - Filter Operator [FIL_142] (rows=462000 width=1436) + Filter Operator [FIL_142] (rows=11 width=104) predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + TableScan [TS_6] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_46] - Merge Join Operator [MERGEJOIN_132] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_132] (rows=10246864 width=402) Conds:RS_43._col0=RS_44._col6(Inner),Output:["_col3","_col4","_col6","_col8","_col12"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_131] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_131] (rows=10246864 width=219) Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col1","_col3","_col6","_col7"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_130] (rows=10246864 width=119) + Conds:RS_164._col0=RS_156._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_156] + PartitionCols:_col0 + Select Operator [SEL_155] (rows=130 width=12) + Output:["_col0"] + Filter Operator [FIL_154] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) + TableScan [TS_19] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + PartitionCols:_col0 + Select Operator [SEL_163] (rows=143930993 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_162] (rows=143930993 width=123) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_16] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_161] + Group By Operator [GBY_160] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_159] + Group By Operator [GBY_158] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_157] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_155] <-Reducer 9 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] + SHUFFLE [RS_29] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_129] (rows=508200 width=1436) + Merge Join Operator [MERGEJOIN_129] (rows=462007 width=104) Conds:RS_146._col1=RS_153._col0(Left Outer),Output:["_col0","_col1","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_146] PartitionCols:_col1 - Select Operator [SEL_143] (rows=462000 width=1436) + Select Operator [SEL_143] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_140] (rows=462000 width=1436) + Filter Operator [FIL_140] (rows=462000 width=104) predicate:i_item_sk is not null Please refer to the previous TableScan [TS_6] - <-Reducer 12 [ONE_TO_ONE_EDGE] vectorized + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_153] PartitionCols:_col0 - Select Operator [SEL_152] (rows=231000 width=1436) + Select Operator [SEL_152] (rows=5 width=104) Output:["_col0","_col1"] - Group By Operator [GBY_151] (rows=231000 width=1436) + Group By Operator [GBY_151] (rows=5 width=100) Output:["_col0"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col0 - Group By Operator [GBY_147] (rows=462000 width=1436) + Group By Operator [GBY_147] (rows=5 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_144] (rows=462000 width=1436) + Select Operator [SEL_144] (rows=11 width=104) Output:["i_item_id"] - Filter Operator [FIL_141] (rows=462000 width=1436) + Filter Operator [FIL_141] (rows=11 width=104) predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) Please refer to the previous TableScan [TS_6] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_130] (rows=158402938 width=135) - Conds:RS_166._col0=RS_156._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_156] - PartitionCols:_col0 - Select Operator [SEL_155] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_154] (rows=18262 width=1119) - predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_19] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] - PartitionCols:_col0 - Select Operator [SEL_165] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_164] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_29_item_i_item_sk_min) AND DynamicValue(RS_29_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_29_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_16] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_163] - Group By Operator [GBY_162] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_115] - Group By Operator [GBY_114] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_113] (rows=508200 width=1436) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_129] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_157] (rows=18262 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_155] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_128] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_128] (rows=80000000 width=191) Conds:RS_136._col1=RS_139._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_136] PartitionCols:_col1 - Select Operator [SEL_135] (rows=80000000 width=860) + Select Operator [SEL_135] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=80000000 width=860) + Filter Operator [FIL_134] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_138] (rows=40000000 width=1014) + Select Operator [SEL_138] (rows=40000000 width=191) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_137] (rows=40000000 width=1014) + Filter Operator [FIL_137] (rows=40000000 width=191) predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_zip"] + TableScan [TS_3] (rows=40000000 width=191) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/query46.q.out index a73f5c4824..6d394e7317 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query46.q.out @@ -83,184 +83,160 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Map 8 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 16 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_192] - Limit [LIM_191] (rows=100 width=88) + File Output Operator [FS_185] + Limit [LIM_184] (rows=100 width=594) Number of rows:100 - Select Operator [SEL_190] (rows=463823414 width=88) + Select Operator [SEL_183] (rows=20351707 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_46] - Select Operator [SEL_45] (rows=463823414 width=88) + Select Operator [SEL_45] (rows=20351707 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_44] (rows=463823414 width=88) + Filter Operator [FIL_44] (rows=20351707 width=594) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_145] (rows=463823414 width=88) - Conds:RS_41._col0=RS_189._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_145] (rows=20351707 width=594) + Conds:RS_41._col0=RS_182._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_41] + SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_140] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_140] (rows=80000000 width=277) Conds:RS_148._col1=RS_151._col0(Inner),Output:["_col0","_col2","_col3","_col5"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_151] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] PartitionCols:_col0 - Select Operator [SEL_150] (rows=40000000 width=1014) + Select Operator [SEL_150] (rows=40000000 width=97) Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=40000000 width=1014) + Filter Operator [FIL_149] (rows=40000000 width=97) predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,current_addr,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_city"] + TableScan [TS_3] (rows=40000000 width=97) + default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_148] PartitionCols:_col1 - Select Operator [SEL_147] (rows=80000000 width=860) + Select Operator [SEL_147] (rows=80000000 width=188) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_146] (rows=80000000 width=860) + Filter Operator [FIL_146] (rows=80000000 width=188) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] + TableScan [TS_0] (rows=80000000 width=188) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_182] PartitionCols:_col1 - Select Operator [SEL_188] (rows=421657640 width=88) + Select Operator [SEL_181] (rows=20351707 width=321) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_187] (rows=421657640 width=88) + Group By Operator [GBY_180] (rows=20351707 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 7 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_34] (rows=843315281 width=88) + Group By Operator [GBY_34] (rows=20351707 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col17, _col3, _col5 - Merge Join Operator [MERGEJOIN_144] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_144] (rows=20351707 width=97) Conds:RS_30._col3=RS_152._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col17"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_152] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_150] - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_143] (rows=766650239 width=88) - Conds:RS_27._col2=RS_174._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_174] + Merge Join Operator [MERGEJOIN_143] (rows=20351707 width=4) + Conds:RS_27._col2=RS_171._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_171] PartitionCols:_col0 - Select Operator [SEL_173] (rows=7200 width=107) + Select Operator [SEL_170] (rows=1855 width=12) Output:["_col0"] - Filter Operator [FIL_172] (rows=7200 width=107) + Filter Operator [FIL_169] (rows=1855 width=12) predicate:(((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 12 [SIMPLE_EDGE] + TableScan [TS_15] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_142] (rows=696954748 width=88) - Conds:RS_24._col4=RS_166._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_166] + Merge Join Operator [MERGEJOIN_142] (rows=78993142 width=178) + Conds:RS_24._col4=RS_163._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_165] (rows=1704 width=1910) + Select Operator [SEL_162] (rows=85 width=97) Output:["_col0"] - Filter Operator [FIL_164] (rows=1704 width=1910) + Filter Operator [FIL_161] (rows=85 width=97) predicate:((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null) - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_city"] - <-Reducer 11 [SIMPLE_EDGE] + TableScan [TS_12] (rows=1704 width=97) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_141] (rows=633595212 width=88) - Conds:RS_186._col0=RS_158._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 14 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] + Merge Join Operator [MERGEJOIN_141] (rows=196204013 width=218) + Conds:RS_179._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] PartitionCols:_col0 - Select Operator [SEL_157] (rows=73049 width=1119) + Select Operator [SEL_154] (rows=783 width=12) Output:["_col0"] - Filter Operator [FIL_156] (rows=73049 width=1119) + Filter Operator [FIL_153] (rows=783 width=12) predicate:((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dow"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_179] PartitionCols:_col0 - Select Operator [SEL_185] (rows=575995635 width=88) + Select Operator [SEL_178] (rows=457565061 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_184] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_31_customer_address_ca_address_sk_min) AND DynamicValue(RS_31_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_31_customer_address_ca_address_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_41_customer_c_customer_sk_min) AND DynamicValue(RS_41_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_41_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_28_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_28_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_28_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] + Filter Operator [FIL_177] (rows=457565061 width=237) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_28_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_28_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_28_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=237) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_156] (rows=783 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_154] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_163] - Group By Operator [GBY_162] (rows=1 width=12) + BROADCAST [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_159] (rows=73049 width=1119) + Select Operator [SEL_164] (rows=85 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_157] + Please refer to the previous Select Operator [SEL_162] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_171] - Group By Operator [GBY_170] (rows=1 width=12) + BROADCAST [RS_176] + Group By Operator [GBY_175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_169] - Group By Operator [GBY_168] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_167] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_165] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_179] - Group By Operator [GBY_178] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_177] - Group By Operator [GBY_176] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_174] + Group By Operator [GBY_173] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_175] (rows=7200 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_173] - <-Reducer 5 [BROADCAST_EDGE] vectorized - BROADCAST [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_117] - Group By Operator [GBY_116] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] - Select Operator [SEL_115] (rows=88000001 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_140] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_181] - Group By Operator [GBY_180] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] - Group By Operator [GBY_154] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_153] (rows=40000000 width=1014) + Select Operator [SEL_172] (rows=1855 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_150] + Please refer to the previous Select Operator [SEL_170] diff --git a/ql/src/test/results/clientpositive/perf/tez/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/query47.q.out index f9c21aa376..75ab3172dc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query47.q.out @@ -111,180 +111,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 1 <- Reducer 13 (BROADCAST_EDGE) Reducer 10 <- Reducer 5 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_334] - Limit [LIM_333] (rows=100 width=88) + File Output Operator [FS_321] + Limit [LIM_320] (rows=100 width=658) Number of rows:100 - Select Operator [SEL_332] (rows=843315280 width=88) + Select Operator [SEL_319] (rows=241454 width=658) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_108] - Select Operator [SEL_107] (rows=843315280 width=88) + SHUFFLE [RS_110] + Select Operator [SEL_109] (rows=241454 width=658) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_279] (rows=843315280 width=88) - Conds:RS_320._col0, _col1, _col2, _col3, (_col5 + 1)=RS_331._col0, _col1, _col2, _col3, _col8(Inner),RS_331._col0, _col1, _col2, _col3, _col8=RS_315._col0, _col1, _col2, _col3, (_col5 - 1)(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] - PartitionCols:_col0, _col1, _col2, _col3, _col8 - Select Operator [SEL_330] (rows=31943759 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_329] (rows=31943759 width=88) - predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END - Select Operator [SEL_328] (rows=63887519 width=88) - Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_327] (rows=63887519 width=88) - predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) - PTF Operator [PTF_326] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5, _col6"}] - Select Operator [SEL_325] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] - PartitionCols:_col1, _col0, _col4, _col5 - Select Operator [SEL_323] (rows=383325119 width=88) - Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_322] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col5, _col2"}] - Select Operator [SEL_321] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - PartitionCols:_col1, _col0, _col4, _col5, _col2 - Group By Operator [GBY_307] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_92] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col5, _col6, _col11, _col12 - Merge Join Operator [MERGEJOIN_278] (rows=766650239 width=88) - Conds:RS_88._col2=RS_298._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] - PartitionCols:_col0 - Select Operator [SEL_297] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_296] (rows=1704 width=1910) - predicate:(s_company_name is not null and s_store_name is not null and s_store_sk is not null) - TableScan [TS_79] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_277] (rows=696954748 width=88) - Conds:RS_85._col1=RS_290._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_288] (rows=462000 width=1436) - predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) - TableScan [TS_76] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=633595212 width=88) - Conds:RS_306._col0=RS_282._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] - PartitionCols:_col0 - Select Operator [SEL_281] (rows=73049 width=1119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_280] (rows=73049 width=1119) - predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - PartitionCols:_col0 - Select Operator [SEL_305] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_304] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_86_item_i_item_sk_min) AND DynamicValue(RS_86_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_86_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_89_store_s_store_sk_min) AND DynamicValue(RS_89_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_89_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_70] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_283] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_281] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] - Group By Operator [GBY_292] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_303] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_299] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_297] + Merge Join Operator [MERGEJOIN_278] (rows=241454 width=546) + Conds:RS_106._col6, _col7, _col8, _col9, _col14=RS_306._col0, _col1, _col2, _col3, (_col5 - 1)(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + SHUFFLE [RS_306] PartitionCols:_col0, _col1, _col2, _col3, (_col5 - 1) - Select Operator [SEL_314] (rows=383325119 width=88) + Select Operator [SEL_304] (rows=162257387 width=485) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_313] (rows=383325119 width=88) + Filter Operator [FIL_302] (rows=162257387 width=489) predicate:rank_window_0 is not null - PTF Operator [PTF_312] (rows=383325119 width=88) + PTF Operator [PTF_300] (rows=162257387 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] - Select Operator [SEL_311] (rows=383325119 width=88) + Select Operator [SEL_299] (rows=162257387 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] + SHUFFLE [RS_297] PartitionCols:_col1, _col0, _col4, _col5 - Please refer to the previous Group By Operator [GBY_307] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] - PartitionCols:_col0, _col1, _col2, _col3, (_col5 + 1) - Select Operator [SEL_319] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_318] (rows=383325119 width=88) - predicate:rank_window_0 is not null - PTF Operator [PTF_317] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] - Select Operator [SEL_316] (rows=383325119 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - PartitionCols:_col1, _col0, _col4, _col5 - Please refer to the previous Group By Operator [GBY_307] + Group By Operator [GBY_296] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_92] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col5, _col6, _col11, _col12 + Merge Join Operator [MERGEJOIN_276] (rows=162257387 width=472) + Conds:RS_88._col2=RS_295._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_294] (rows=1704 width=183) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_293] (rows=1704 width=183) + predicate:(s_company_name is not null and s_store_name is not null and s_store_sk is not null) + TableScan [TS_79] (rows=1704 width=183) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_275] (rows=162257387 width=297) + Conds:RS_85._col1=RS_292._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] + PartitionCols:_col0 + Select Operator [SEL_291] (rows=462000 width=194) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_290] (rows=462000 width=194) + predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) + TableScan [TS_76] (rows=462000 width=194) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_274] (rows=162257387 width=111) + Conds:RS_289._col0=RS_281._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_281] + PartitionCols:_col0 + Select Operator [SEL_280] (rows=564 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_279] (rows=564 width=12) + predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) + TableScan [TS_73] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_287] (rows=525329897 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_70] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + Group By Operator [GBY_283] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_282] (rows=564 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_280] + <-Reducer 9 [ONE_TO_ONE_EDGE] + FORWARD [RS_106] + PartitionCols:_col6, _col7, _col8, _col9, _col14 + Merge Join Operator [MERGEJOIN_277] (rows=241454 width=717) + Conds:RS_307._col0, _col1, _col2, _col3, (_col5 + 1)=RS_318._col0, _col1, _col2, _col3, _col8(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + PartitionCols:_col0, _col1, _col2, _col3, (_col5 + 1) + Select Operator [SEL_305] (rows=162257387 width=485) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_303] (rows=162257387 width=489) + predicate:rank_window_0 is not null + PTF Operator [PTF_301] (rows=162257387 width=489) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] + Please refer to the previous Select Operator [SEL_299] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0, _col1, _col2, _col3, _col8 + Select Operator [SEL_317] (rows=241454 width=605) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_316] (rows=241454 width=605) + predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END + Select Operator [SEL_315] (rows=482909 width=601) + Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_314] (rows=482909 width=601) + predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) + PTF Operator [PTF_313] (rows=162257387 width=601) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5, _col6"}] + Select Operator [SEL_312] (rows=162257387 width=601) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col1, _col0, _col4, _col5 + Select Operator [SEL_310] (rows=162257387 width=489) + Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + PTF Operator [PTF_309] (rows=162257387 width=489) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col5, _col2"}] + Select Operator [SEL_308] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] + PartitionCols:_col1, _col0, _col4, _col5, _col2 + Please refer to the previous Group By Operator [GBY_296] diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index ffb9aa2526..76b4ce1fe1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,15 +143,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Map 7 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -165,103 +165,103 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["sum(_col5)"] - Select Operator [SEL_28] (rows=31233897 width=88) + Select Operator [SEL_28] (rows=25203 width=86) Output:["_col5"] - Filter Operator [FIL_27] (rows=31233897 width=88) + Filter Operator [FIL_27] (rows=25203 width=86) predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_96] (rows=93701693 width=88) - Conds:RS_24._col3=RS_126._col0(Inner),Output:["_col5","_col7","_col14"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + Merge Join Operator [MERGEJOIN_96] (rows=75613 width=86) + Conds:RS_24._col3=RS_118._col0(Inner),Output:["_col5","_col7","_col14"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] PartitionCols:_col0 - Select Operator [SEL_125] (rows=20000000 width=1014) + Select Operator [SEL_117] (rows=3529412 width=187) Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=20000000 width=1014) + Filter Operator [FIL_116] (rows=3529412 width=187) predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + TableScan [TS_12] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_95] (rows=85183356 width=88) - Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col5","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + Merge Join Operator [MERGEJOIN_95] (rows=856941 width=0) + Conds:RS_21._col2=RS_110._col0(Inner),Output:["_col3","_col5","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_114] (rows=465450 width=385) + Select Operator [SEL_109] (rows=29552 width=184) Output:["_col0"] - Filter Operator [FIL_113] (rows=465450 width=385) + Filter Operator [FIL_108] (rows=29552 width=183) predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_9] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + TableScan [TS_9] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=77439413 width=88) - Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col5","_col7"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_94] (rows=19008181 width=108) + Conds:RS_18._col1=RS_102._col0(Inner),Output:["_col2","_col3","_col5","_col7"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_106] (rows=36524 width=1119) + Select Operator [SEL_101] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_105] (rows=36524 width=1119) + Filter Operator [FIL_100] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=70399465 width=88) - Conds:RS_99._col0=RS_123._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col7"] + Merge Join Operator [MERGEJOIN_93] (rows=53235296 width=122) + Conds:RS_99._col0=RS_126._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_99] + SHUFFLE [RS_99] PartitionCols:_col0 - Select Operator [SEL_98] (rows=1704 width=1910) + Select Operator [SEL_98] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_97] (rows=1704 width=1910) + Filter Operator [FIL_97] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + TableScan [TS_0] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] PartitionCols:_col3 - Select Operator [SEL_122] (rows=63999513 width=88) + Select Operator [SEL_125] (rows=53235296 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Filter Operator [FIL_121] (rows=63999513 width=88) - predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + Filter Operator [FIL_124] (rows=53235296 width=233) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_25_customer_address_ca_address_sk_min) AND DynamicValue(RS_25_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_25_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=36524 width=1119) + Select Operator [SEL_111] (rows=29552 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - Group By Operator [GBY_117] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_116] (rows=465450 width=385) + Please refer to the previous Select Operator [SEL_109] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_123] + Group By Operator [GBY_122] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3529412)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + Group By Operator [GBY_120] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3529412)"] + Select Operator [SEL_119] (rows=3529412 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_114] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_117] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] - Group By Operator [GBY_101] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_100] (rows=1704 width=1910) + Select Operator [SEL_103] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_98] + Please refer to the previous Select Operator [SEL_101] diff --git a/ql/src/test/results/clientpositive/perf/tez/query49.q.out b/ql/src/test/results/clientpositive/perf/tez/query49.q.out index 1f3e14d000..6fe8e9aa94 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query49.q.out @@ -300,87 +300,87 @@ Stage-0 Stage-1 Reducer 11 vectorized File Output Operator [FS_310] - Limit [LIM_309] (rows=100 width=101) + Limit [LIM_309] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_308] (rows=5915494 width=101) + Select Operator [SEL_308] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_307] - Select Operator [SEL_306] (rows=5915494 width=101) + Select Operator [SEL_306] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_305] (rows=5915494 width=101) + Group By Operator [GBY_305] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 9 [SIMPLE_EDGE] <-Reducer 24 [CONTAINS] vectorized Reduce Output Operator [RS_351] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_350] (rows=11830988 width=101) + Group By Operator [GBY_350] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_349] (rows=11830988 width=101) + Top N Key Operator [TNK_349] (rows=3418 width=214) keys:_col0, _col3, _col4, _col1, _col2,sort order:+++++,top n:100 - Select Operator [SEL_348] (rows=8604378 width=88) + Select Operator [SEL_348] (rows=1142 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_347] (rows=8604378 width=88) + Filter Operator [FIL_347] (rows=1142 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_346] (rows=12906568 width=88) + PTF Operator [PTF_346] (rows=1714 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_345] (rows=12906568 width=88) + Select Operator [SEL_345] (rows=1714 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 23 [SIMPLE_EDGE] vectorized SHUFFLE [RS_344] PartitionCols:0 - Select Operator [SEL_343] (rows=12906568 width=88) + Select Operator [SEL_343] (rows=1714 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_342] (rows=12906568 width=88) + PTF Operator [PTF_342] (rows=1714 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_341] (rows=12906568 width=88) + Select Operator [SEL_341] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 22 [SIMPLE_EDGE] vectorized SHUFFLE [RS_340] PartitionCols:0 - Group By Operator [GBY_339] (rows=12906568 width=88) + Group By Operator [GBY_339] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_89] PartitionCols:_col0 - Group By Operator [GBY_88] (rows=25813137 width=88) + Group By Operator [GBY_88] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 - Select Operator [SEL_86] (rows=25813137 width=88) + Select Operator [SEL_86] (rows=1673571 width=73) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_237] (rows=25813137 width=88) + Merge Join Operator [MERGEJOIN_237] (rows=1673571 width=73) Conds:RS_83._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col11","_col12"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_338] PartitionCols:_col0, _col1 - Select Operator [SEL_337] (rows=19197050 width=77) + Select Operator [SEL_337] (rows=19197050 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_336] (rows=19197050 width=77) + Filter Operator [FIL_336] (rows=19197050 width=119) predicate:((sr_return_amt > 10000) and sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_77] (rows=57591150 width=77) - default@store_returns,sr,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] + TableScan [TS_77] (rows=57591150 width=119) + default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_83] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_236] (rows=23466488 width=88) + Merge Join Operator [MERGEJOIN_236] (rows=1673571 width=8) Conds:RS_335._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_272] PartitionCols:_col0 - Select Operator [SEL_267] (rows=18262 width=1119) + Select Operator [SEL_267] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_266] (rows=18262 width=1119) + Filter Operator [FIL_266] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 29 [SIMPLE_EDGE] vectorized SHUFFLE [RS_335] PartitionCols:_col0 - Select Operator [SEL_334] (rows=21333171 width=88) + Select Operator [SEL_334] (rows=61119617 width=229) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_333] (rows=21333171 width=88) + Filter Operator [FIL_333] (rows=61119617 width=229) predicate:((ss_net_paid > 0) and (ss_net_profit > 1) and (ss_quantity > 0) and (ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_ticket_number is not null) - TableScan [TS_71] (rows=575995635 width=88) - default@store_sales,sts,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] + TableScan [TS_71] (rows=575995635 width=229) + default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized BROADCAST [RS_332] Group By Operator [GBY_331] (rows=1 width=12) @@ -389,70 +389,70 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_279] Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_273] (rows=18262 width=1119) + Select Operator [SEL_273] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_267] <-Reducer 8 [CONTAINS] vectorized Reduce Output Operator [RS_304] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_303] (rows=11830988 width=101) + Group By Operator [GBY_303] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_302] (rows=11830988 width=101) + Top N Key Operator [TNK_302] (rows=3418 width=214) keys:_col0, _col3, _col4, _col1, _col2,sort order:+++++,top n:100 - Select Operator [SEL_301] (rows=3226610 width=135) + Select Operator [SEL_301] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_300] (rows=3226610 width=135) + Group By Operator [GBY_300] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized Reduce Output Operator [RS_330] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_329] (rows=6453220 width=135) + Group By Operator [GBY_329] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_328] (rows=4302070 width=135) + Select Operator [SEL_328] (rows=1134 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_327] (rows=4302070 width=135) + Filter Operator [FIL_327] (rows=1134 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_326] (rows=6453105 width=135) + PTF Operator [PTF_326] (rows=1701 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_325] (rows=6453105 width=135) + Select Operator [SEL_325] (rows=1701 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 17 [SIMPLE_EDGE] vectorized SHUFFLE [RS_324] PartitionCols:0 - Select Operator [SEL_323] (rows=6453105 width=135) + Select Operator [SEL_323] (rows=1701 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_322] (rows=6453105 width=135) + PTF Operator [PTF_322] (rows=1701 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_321] (rows=6453105 width=135) + Select Operator [SEL_321] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized SHUFFLE [RS_320] PartitionCols:0 - Group By Operator [GBY_319] (rows=6453105 width=135) + Group By Operator [GBY_319] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col0 - Group By Operator [GBY_49] (rows=12906211 width=135) + Group By Operator [GBY_49] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 - Select Operator [SEL_47] (rows=12906211 width=135) + Select Operator [SEL_47] (rows=865646 width=188) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_235] (rows=12906211 width=135) + Merge Join Operator [MERGEJOIN_235] (rows=865646 width=188) Conds:RS_44._col1, _col2=RS_318._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col11","_col12"] <-Map 28 [SIMPLE_EDGE] vectorized SHUFFLE [RS_318] PartitionCols:_col0, _col1 - Select Operator [SEL_317] (rows=9599627 width=106) + Select Operator [SEL_317] (rows=9599627 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_316] (rows=9599627 width=106) + Filter Operator [FIL_316] (rows=9599627 width=121) predicate:((cr_return_amount > 10000) and cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_38] (rows=28798881 width=106) - default@catalog_returns,cr,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] + TableScan [TS_38] (rows=28798881 width=121) + default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_234] (rows=11732919 width=135) + Merge Join Operator [MERGEJOIN_234] (rows=865646 width=102) Conds:RS_315._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_270] @@ -461,12 +461,12 @@ Stage-0 <-Map 27 [SIMPLE_EDGE] vectorized SHUFFLE [RS_315] PartitionCols:_col0 - Select Operator [SEL_314] (rows=10666290 width=135) + Select Operator [SEL_314] (rows=31838858 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_313] (rows=10666290 width=135) + Filter Operator [FIL_313] (rows=31838858 width=239) predicate:((cs_net_paid > 0) and (cs_net_profit > 1) and (cs_quantity > 0) and (cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null and cs_sold_date_sk is not null) - TableScan [TS_32] (rows=287989836 width=135) - default@catalog_sales,cs,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] + TableScan [TS_32] (rows=287989836 width=239) + default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_312] Group By Operator [GBY_311] (rows=1 width=12) @@ -475,58 +475,58 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_278] Group By Operator [GBY_275] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_271] (rows=18262 width=1119) + Select Operator [SEL_271] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_267] <-Reducer 6 [CONTAINS] vectorized Reduce Output Operator [RS_299] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_298] (rows=6453220 width=135) + Group By Operator [GBY_298] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_297] (rows=2151150 width=135) + Select Operator [SEL_297] (rows=1142 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_296] (rows=2151150 width=135) + Filter Operator [FIL_296] (rows=1142 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_295] (rows=3226726 width=135) + PTF Operator [PTF_295] (rows=1714 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_294] (rows=3226726 width=135) + Select Operator [SEL_294] (rows=1714 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_293] PartitionCols:0 - Select Operator [SEL_292] (rows=3226726 width=135) + Select Operator [SEL_292] (rows=1714 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_291] (rows=3226726 width=135) + PTF Operator [PTF_291] (rows=1714 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_290] (rows=3226726 width=135) + Select Operator [SEL_290] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_289] PartitionCols:0 - Group By Operator [GBY_288] (rows=3226726 width=135) + Group By Operator [GBY_288] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=6453452 width=135) + Group By Operator [GBY_17] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 - Select Operator [SEL_15] (rows=6453452 width=135) + Select Operator [SEL_15] (rows=438010 width=177) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_233] (rows=6453452 width=135) + Merge Join Operator [MERGEJOIN_233] (rows=438010 width=177) Conds:RS_12._col1, _col2=RS_287._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col11","_col12"] <-Map 26 [SIMPLE_EDGE] vectorized SHUFFLE [RS_287] PartitionCols:_col0, _col1 - Select Operator [SEL_286] (rows=4799489 width=92) + Select Operator [SEL_286] (rows=4799489 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_285] (rows=4799489 width=92) + Filter Operator [FIL_285] (rows=4799489 width=118) predicate:((wr_return_amt > 10000) and wr_item_sk is not null and wr_order_number is not null) - TableScan [TS_6] (rows=14398467 width=92) - default@web_returns,wr,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] + TableScan [TS_6] (rows=14398467 width=118) + default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_232] (rows=5866775 width=135) + Merge Join Operator [MERGEJOIN_232] (rows=438010 width=122) Conds:RS_284._col0=RS_268._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_268] @@ -535,12 +535,12 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_284] PartitionCols:_col0 - Select Operator [SEL_283] (rows=5333432 width=135) + Select Operator [SEL_283] (rows=15996318 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_282] (rows=5333432 width=135) + Filter Operator [FIL_282] (rows=15996318 width=239) predicate:((ws_net_paid > 0) and (ws_net_profit > 1) and (ws_quantity > 0) and (ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] + TableScan [TS_0] (rows=144002668 width=239) + default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized BROADCAST [RS_281] Group By Operator [GBY_280] (rows=1 width=12) @@ -549,7 +549,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_277] Group By Operator [GBY_274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_269] (rows=18262 width=1119) + Select Operator [SEL_269] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_267] diff --git a/ql/src/test/results/clientpositive/perf/tez/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/query5.q.out index 249411197e..31afb0ef17 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query5.q.out @@ -277,28 +277,22 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 22 <- Reducer 15 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 23 (CONTAINS) -Map 24 <- Union 23 (CONTAINS) -Map 27 <- Reducer 19 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE), Union 28 (CONTAINS) -Map 29 <- Reducer 32 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 21 <- Reducer 15 (BROADCAST_EDGE), Union 22 (CONTAINS) +Map 23 <- Union 22 (CONTAINS) +Map 25 <- Reducer 19 (BROADCAST_EDGE), Union 26 (CONTAINS) Map 9 <- Union 2 (CONTAINS) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE) -Reducer 13 <- Map 25 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE) +Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 10 (SIMPLE_EDGE), Union 28 (SIMPLE_EDGE) -Reducer 17 <- Map 34 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 16 <- Map 10 (SIMPLE_EDGE), Union 26 (SIMPLE_EDGE) +Reducer 17 <- Map 30 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE), Union 26 (CONTAINS) Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 7 <- Union 6 (SIMPLE_EDGE) @@ -309,249 +303,202 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_309] - Limit [LIM_308] (rows=100 width=110) + File Output Operator [FS_304] + Limit [LIM_303] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_307] (rows=1136898901 width=110) + Select Operator [SEL_302] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - Select Operator [SEL_305] (rows=1136898901 width=110) + SHUFFLE [RS_301] + Select Operator [SEL_300] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_304] (rows=1136898901 width=110) + Group By Operator [GBY_299] (rows=38846 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_323] + Reduce Output Operator [RS_315] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_322] (rows=2273797803 width=110) + Group By Operator [GBY_314] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_321] (rows=757932601 width=110) + Top N Key Operator [TNK_313] (rows=39721 width=618) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_320] (rows=191657181 width=132) + Select Operator [SEL_312] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_319] (rows=191657181 width=132) + Group By Operator [GBY_311] (rows=38846 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_46] (rows=383314363 width=132) + Group By Operator [GBY_46] (rows=46000 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col9 - Merge Join Operator [MERGEJOIN_222] (rows=383314363 width=132) - Conds:RS_42._col0=RS_315._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_315] + Merge Join Operator [MERGEJOIN_222] (rows=34813117 width=535) + Conds:RS_42._col0=RS_310._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] PartitionCols:_col0 - Select Operator [SEL_314] (rows=46000 width=460) + Select Operator [SEL_309] (rows=46000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_313] (rows=46000 width=460) + Filter Operator [FIL_308] (rows=46000 width=104) predicate:cp_catalog_page_sk is not null - TableScan [TS_36] (rows=46000 width=460) - default@catalog_page,catalog_page,Tbl:COMPLETE,Col:NONE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] + TableScan [TS_36] (rows=46000 width=104) + default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_221] (rows=348467596 width=132) - Conds:Union 23._col1=RS_276._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_221] (rows=34813117 width=438) + Conds:Union 22._col1=RS_276._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_276] PartitionCols:_col0 - Select Operator [SEL_273] (rows=8116 width=1119) + Select Operator [SEL_273] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_272] (rows=8116 width=1119) + Filter Operator [FIL_272] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) - TableScan [TS_8] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Union 23 [SIMPLE_EDGE] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_341] + TableScan [TS_8] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Union 22 [SIMPLE_EDGE] + <-Map 21 [CONTAINS] vectorized + Reduce Output Operator [RS_328] PartitionCols:_col1 - Select Operator [SEL_340] (rows=287989836 width=135) + Select Operator [SEL_327] (rows=285117694 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_339] (rows=287989836 width=135) - predicate:((cs_catalog_page_sk BETWEEN DynamicValue(RS_43_catalog_page_cp_catalog_page_sk_min) AND DynamicValue(RS_43_catalog_page_cp_catalog_page_sk_max) and in_bloom_filter(cs_catalog_page_sk, DynamicValue(RS_43_catalog_page_cp_catalog_page_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_253] (rows=287989836 width=135) + Filter Operator [FIL_326] (rows=285117694 width=231) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_253] (rows=287989836 width=231) Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_336] - Group By Operator [GBY_335] (rows=1 width=12) + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_284] Group By Operator [GBY_281] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_277] (rows=8116 width=1119) + Select Operator [SEL_277] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_273] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_338] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_318] - Group By Operator [GBY_317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_316] (rows=46000 width=460) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] - <-Map 24 [CONTAINS] vectorized - Reduce Output Operator [RS_344] + <-Map 23 [CONTAINS] vectorized + Reduce Output Operator [RS_331] PartitionCols:_col1 - Select Operator [SEL_343] (rows=28798881 width=106) + Select Operator [SEL_330] (rows=28221805 width=451) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_342] (rows=28798881 width=106) + Filter Operator [FIL_329] (rows=28221805 width=227) predicate:(cr_catalog_page_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_258] (rows=28798881 width=106) + TableScan [TS_258] (rows=28798881 width=227) Output:["cr_returned_date_sk","cr_catalog_page_sk","cr_return_amount","cr_net_loss"] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_334] + Reduce Output Operator [RS_323] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_333] (rows=2273797803 width=110) + Group By Operator [GBY_322] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_332] (rows=757932601 width=110) + Top N Key Operator [TNK_321] (rows=39721 width=618) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_331] (rows=182955399 width=135) + Select Operator [SEL_320] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_330] (rows=182955399 width=135) + Group By Operator [GBY_319] (rows=53 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_80] PartitionCols:_col0 - Group By Operator [GBY_79] (rows=365910798 width=135) + Group By Operator [GBY_79] (rows=84 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col9 - Merge Join Operator [MERGEJOIN_224] (rows=365910798 width=135) - Conds:RS_75._col0=RS_326._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9"] - <-Map 34 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_326] + Merge Join Operator [MERGEJOIN_224] (rows=30966059 width=543) + Conds:RS_75._col0=RS_318._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] PartitionCols:_col0 - Select Operator [SEL_325] (rows=84 width=1850) + Select Operator [SEL_317] (rows=84 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_324] (rows=84 width=1850) + Filter Operator [FIL_316] (rows=84 width=104) predicate:web_site_sk is not null - TableScan [TS_69] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_site_id"] + TableScan [TS_69] (rows=84 width=104) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_223] (rows=332646173 width=135) - Conds:Union 28._col1=RS_278._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_223] (rows=30966059 width=447) + Conds:Union 26._col1=RS_278._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_278] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_273] - <-Union 28 [SIMPLE_EDGE] - <-Map 27 [CONTAINS] vectorized - Reduce Output Operator [RS_352] + <-Union 26 [SIMPLE_EDGE] + <-Map 25 [CONTAINS] vectorized + Reduce Output Operator [RS_336] PartitionCols:_col1 - Select Operator [SEL_351] (rows=144002668 width=135) + Select Operator [SEL_335] (rows=143930874 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_350] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_76_web_site_web_site_sk_min) AND DynamicValue(RS_76_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_76_web_site_web_site_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_263] (rows=144002668 width=135) + Filter Operator [FIL_334] (rows=143930874 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_263] (rows=144002668 width=231) Output:["ws_sold_date_sk","ws_web_site_sk","ws_ext_sales_price","ws_net_profit"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_348] - Group By Operator [GBY_347] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - Group By Operator [GBY_328] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_327] (rows=84 width=1850) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_325] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) + BROADCAST [RS_333] + Group By Operator [GBY_332] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_285] Group By Operator [GBY_282] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_279] (rows=8116 width=1119) + Select Operator [SEL_279] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_273] - <-Reducer 30 [CONTAINS] + <-Reducer 28 [CONTAINS] Reduce Output Operator [RS_271] PartitionCols:_col1 - Select Operator [SEL_269] (rows=158402938 width=135) + Select Operator [SEL_269] (rows=134782734 width=454) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_268] (rows=158402938 width=135) - Conds:RS_368._col0, _col2=RS_355._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_355] + Merge Join Operator [MERGEJOIN_268] (rows=134782734 width=230) + Conds:RS_339._col0, _col2=RS_342._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_339] + PartitionCols:_col0, _col2 + Select Operator [SEL_338] (rows=143966669 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_337] (rows=143966669 width=11) + predicate:(ws_item_sk is not null and ws_order_number is not null and ws_web_site_sk is not null) + TableScan [TS_54] (rows=144002668 width=11) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] PartitionCols:_col1, _col2 - Select Operator [SEL_354] (rows=14398467 width=92) + Select Operator [SEL_341] (rows=13749816 width=225) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_353] (rows=14398467 width=92) + Filter Operator [FIL_340] (rows=13749816 width=225) predicate:(wr_item_sk is not null and wr_order_number is not null and wr_returned_date_sk is not null) - TableScan [TS_57] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_368] - PartitionCols:_col0, _col2 - Select Operator [SEL_367] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_366] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_61_web_returns_wr_item_sk_min) AND DynamicValue(RS_61_web_returns_wr_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_61_web_returns_wr_item_sk_bloom_filter))) and (ws_order_number BETWEEN DynamicValue(RS_61_web_returns_wr_order_number_min) AND DynamicValue(RS_61_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_web_returns_wr_order_number_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_76_web_site_web_site_sk_min) AND DynamicValue(RS_76_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_76_web_site_web_site_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_web_site_sk is not null) - TableScan [TS_54] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_349] - Please refer to the previous Group By Operator [GBY_347] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_363] - Group By Operator [GBY_362] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14398467)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_360] - Group By Operator [GBY_358] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14398467)"] - Select Operator [SEL_356] (rows=14398467 width=92) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_354] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_365] - Group By Operator [GBY_364] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14398467)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_361] - Group By Operator [GBY_359] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14398467)"] - Select Operator [SEL_357] (rows=14398467 width=92) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_354] + TableScan [TS_57] (rows=14398467 width=225) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] vectorized - Reduce Output Operator [RS_303] + Reduce Output Operator [RS_298] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_302] (rows=2273797803 width=110) + Group By Operator [GBY_297] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_301] (rows=757932601 width=110) + Top N Key Operator [TNK_296] (rows=39721 width=618) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_300] (rows=383320021 width=87) + Select Operator [SEL_295] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_299] (rows=383320021 width=87) + Group By Operator [GBY_294] (rows=822 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=766640042 width=87) + Group By Operator [GBY_21] (rows=1704 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col9 - Merge Join Operator [MERGEJOIN_220] (rows=766640042 width=87) - Conds:RS_17._col0=RS_290._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9"] + Merge Join Operator [MERGEJOIN_220] (rows=64325014 width=376) + Conds:RS_17._col0=RS_293._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + SHUFFLE [RS_293] PartitionCols:_col0 - Select Operator [SEL_289] (rows=1704 width=1910) + Select Operator [SEL_292] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_288] (rows=1704 width=1910) + Filter Operator [FIL_291] (rows=1704 width=104) predicate:s_store_sk is not null - TableScan [TS_11] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] + TableScan [TS_11] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_219] (rows=696945478 width=87) + Merge Join Operator [MERGEJOIN_219] (rows=64325014 width=277) Conds:Union 2._col1=RS_274._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_274] @@ -559,13 +506,13 @@ Stage-0 Please refer to the previous Select Operator [SEL_273] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_298] + Reduce Output Operator [RS_290] PartitionCols:_col1 - Select Operator [SEL_297] (rows=575995635 width=88) + Select Operator [SEL_289] (rows=525329897 width=445) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_296] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_225] (rows=575995635 width=88) + Filter Operator [FIL_288] (rows=525329897 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_225] (rows=575995635 width=221) Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_287] @@ -575,27 +522,16 @@ Stage-0 SHUFFLE [RS_283] Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_275] (rows=8116 width=1119) + Select Operator [SEL_275] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_273] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] - Group By Operator [GBY_292] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] <-Map 9 [CONTAINS] vectorized - Reduce Output Operator [RS_312] + Reduce Output Operator [RS_307] PartitionCols:_col1 - Select Operator [SEL_311] (rows=57591150 width=77) + Select Operator [SEL_306] (rows=53634860 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_310] (rows=57591150 width=77) + Filter Operator [FIL_305] (rows=53634860 width=223) predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) - TableScan [TS_236] (rows=57591150 width=77) + TableScan [TS_236] (rows=57591150 width=223) Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/query50.q.out index 8cd6f88746..ae6781ba58 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -127,14 +127,12 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Reducer 10 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 12 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) @@ -145,139 +143,117 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_156] - Limit [LIM_155] (rows=100 width=88) + File Output Operator [FS_146] + Limit [LIM_145] (rows=100 width=858) Number of rows:100 - Select Operator [SEL_154] (rows=383325119 width=88) + Select Operator [SEL_144] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] - Group By Operator [GBY_152] (rows=383325119 width=88) + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_29] (rows=766650239 width=88) + Group By Operator [GBY_29] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_56] (rows=766650239 width=88) + Top N Key Operator [TNK_56] (rows=11945216 width=821) keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,sort order:++++++++++,top n:100 - Select Operator [SEL_27] (rows=766650239 width=88) + Select Operator [SEL_27] (rows=11945216 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_120] (rows=766650239 width=88) - Conds:RS_24._col10=RS_143._col0(Inner),Output:["_col0","_col7","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + Merge Join Operator [MERGEJOIN_120] (rows=11945216 width=821) + Conds:RS_24._col10=RS_141._col0(Inner),Output:["_col0","_col7","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_142] (rows=1704 width=1910) + Select Operator [SEL_140] (rows=1704 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_141] (rows=1704 width=1910) + Filter Operator [FIL_139] (rows=1704 width=821) predicate:s_store_sk is not null - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] + TableScan [TS_12] (rows=1704 width=821) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_119] (rows=696954748 width=88) - Conds:RS_21._col7=RS_135._col0(Inner),Output:["_col0","_col7","_col10"] + Merge Join Operator [MERGEJOIN_119] (rows=11945216 width=3) + Conds:RS_21._col7=RS_138._col0(Inner),Output:["_col0","_col7","_col10"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_134] (rows=73049 width=1119) + Select Operator [SEL_137] (rows=73049 width=4) Output:["_col0"] - Filter Operator [FIL_133] (rows=73049 width=1119) + Filter Operator [FIL_136] (rows=73049 width=4) predicate:d_date_sk is not null - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk"] + TableScan [TS_9] (rows=73049 width=4) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_118] (rows=633595212 width=88) - Conds:RS_18._col1, _col2, _col3=RS_151._col1, _col2, _col4(Inner),Output:["_col0","_col7","_col10"] + Merge Join Operator [MERGEJOIN_118] (rows=11945216 width=3) + Conds:RS_18._col1, _col2, _col3=RS_135._col1, _col2, _col4(Inner),Output:["_col0","_col7","_col10"] <-Reducer 2 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_18] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_117] (rows=63350266 width=77) + Merge Join Operator [MERGEJOIN_117] (rows=1339446 width=8) Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_122] (rows=57591150 width=77) + Select Operator [SEL_122] (rows=53632139 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_121] (rows=57591150 width=77) + Filter Operator [FIL_121] (rows=53632139 width=15) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_0] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] + TableScan [TS_0] (rows=57591150 width=15) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_125] (rows=18262 width=1119) + Select Operator [SEL_125] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_124] (rows=18262 width=1119) + Filter Operator [FIL_124] (rows=50 width=12) predicate:((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] + SHUFFLE [RS_135] PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_150] (rows=575995635 width=88) + Select Operator [SEL_134] (rows=501694138 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_149] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_18_store_returns_sr_customer_sk_min) AND DynamicValue(RS_18_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_18_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_store_returns_sr_item_sk_min) AND DynamicValue(RS_18_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_18_store_returns_sr_ticket_number_min) AND DynamicValue(RS_18_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_18_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] + Filter Operator [FIL_133] (rows=501694138 width=19) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_18_store_returns_sr_customer_sk_min) AND DynamicValue(RS_18_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_18_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_store_returns_sr_item_sk_min) AND DynamicValue(RS_18_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_store_returns_sr_item_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_18_store_returns_sr_ticket_number_min) AND DynamicValue(RS_18_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_18_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_6] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_132] Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_94] Group By Operator [GBY_93] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_92] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_140] - Group By Operator [GBY_139] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - Group By Operator [GBY_137] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_136] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_134] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_148] - Group By Operator [GBY_147] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_144] (rows=1704 width=1910) + Select Operator [SEL_92] (rows=1339446 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_142] + Please refer to the previous Merge Join Operator [MERGEJOIN_117] <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_128] Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_84] Group By Operator [GBY_83] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_82] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_82] (rows=1339446 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_117] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_130] Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_89] Group By Operator [GBY_88] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_87] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_87] (rows=1339446 width=0) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_117] diff --git a/ql/src/test/results/clientpositive/perf/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tez/query51.q.out index 402df6c4ca..23c23df1b7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query51.q.out @@ -115,59 +115,59 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_117] - Limit [LIM_116] (rows=100 width=88) + Limit [LIM_116] (rows=100 width=636) Number of rows:100 - Select Operator [SEL_115] (rows=116159124 width=88) + Select Operator [SEL_115] (rows=363803676 width=636) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_114] - Select Operator [SEL_113] (rows=116159124 width=88) + Select Operator [SEL_113] (rows=363803676 width=636) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_112] (rows=116159124 width=88) + Filter Operator [FIL_112] (rows=363803676 width=420) predicate:(max_window_0 > max_window_1) - PTF Operator [PTF_111] (rows=348477374 width=88) + PTF Operator [PTF_111] (rows=1091411029 width=420) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END ASC NULLS LAST","partition by:":"CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END"}] - Select Operator [SEL_110] (rows=348477374 width=88) + Select Operator [SEL_110] (rows=1091411029 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END - Merge Join Operator [MERGEJOIN_87] (rows=348477374 width=88) + Merge Join Operator [MERGEJOIN_87] (rows=1091411029 width=420) Conds:RS_40._col0, _col1=RS_41._col0, _col1(Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col0, _col1 - Select Operator [SEL_37] (rows=79201469 width=135) + Select Operator [SEL_37] (rows=3442937 width=210) Output:["_col0","_col1","_col2"] - PTF Operator [PTF_36] (rows=79201469 width=135) + PTF Operator [PTF_36] (rows=3442937 width=210) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}] - Group By Operator [GBY_32] (rows=79201469 width=135) + Group By Operator [GBY_32] (rows=3442937 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=158402938 width=135) + Group By Operator [GBY_30] (rows=24992810 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_86] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_86] (rows=24992810 width=209) Conds:RS_108._col0=RS_92._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_89] (rows=73049 width=1119) + Select Operator [SEL_89] (rows=317 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_88] (rows=73049 width=1119) + Filter Operator [FIL_88] (rows=317 width=102) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_107] (rows=144002668 width=135) + Select Operator [SEL_107] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_106] (rows=144002668 width=135) + Filter Operator [FIL_106] (rows=143966864 width=119) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_27_date_dim_d_date_sk_min) AND DynamicValue(RS_27_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_27_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_20] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_sales_price"] + TableScan [TS_20] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_105] Group By Operator [GBY_104] (rows=1 width=12) @@ -176,24 +176,24 @@ Stage-0 SHUFFLE [RS_97] Group By Operator [GBY_95] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_93] (rows=73049 width=1119) + Select Operator [SEL_93] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_89] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col0, _col1 - Select Operator [SEL_17] (rows=316797606 width=88) + Select Operator [SEL_17] (rows=3442937 width=210) Output:["_col0","_col1","_col2"] - PTF Operator [PTF_16] (rows=316797606 width=88) + PTF Operator [PTF_16] (rows=3442937 width=210) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}] - Group By Operator [GBY_12] (rows=316797606 width=88) + Group By Operator [GBY_12] (rows=3442937 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=633595212 width=88) + Group By Operator [GBY_10] (rows=95493908 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_85] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_85] (rows=95493908 width=180) Conds:RS_102._col0=RS_90._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_90] @@ -202,12 +202,12 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_101] (rows=575995635 width=88) + Select Operator [SEL_101] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_100] (rows=575995635 width=88) + Filter Operator [FIL_100] (rows=550076554 width=114) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_sales_price"] <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_99] Group By Operator [GBY_98] (rows=1 width=12) @@ -216,7 +216,7 @@ Stage-0 SHUFFLE [RS_96] Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_91] (rows=73049 width=1119) + Select Operator [SEL_91] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_89] diff --git a/ql/src/test/results/clientpositive/perf/tez/query52.q.out b/ql/src/test/results/clientpositive/perf/tez/query52.q.out index 5f3e784594..bc932b2dec 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query52.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query52.q.out @@ -65,55 +65,55 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_79] - Select Operator [SEL_78] (rows=100 width=88) + Select Operator [SEL_78] (rows=100 width=220) Output:["_col0","_col1","_col2","_col3"] - Limit [LIM_77] (rows=100 width=88) + Limit [LIM_77] (rows=100 width=216) Number of rows:100 - Select Operator [SEL_76] (rows=348477374 width=88) + Select Operator [SEL_76] (rows=7333 width=216) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_75] - Group By Operator [GBY_74] (rows=348477374 width=88) + Group By Operator [GBY_74] (rows=7333 width=216) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=7333 width=216) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8 - Merge Join Operator [MERGEJOIN_54] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_54] (rows=2301098 width=104) Conds:RS_12._col1=RS_65._col0(Inner),Output:["_col2","_col7","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_64] (rows=231000 width=1436) + Select Operator [SEL_64] (rows=7333 width=111) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_63] (rows=231000 width=1436) + Filter Operator [FIL_63] (rows=7333 width=111) predicate:((i_manager_id = 1) and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] + TableScan [TS_6] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_53] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_53] (rows=15062131 width=4) Conds:RS_73._col0=RS_57._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_57] PartitionCols:_col0 - Select Operator [SEL_56] (rows=18262 width=1119) + Select Operator [SEL_56] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_55] (rows=18262 width=1119) + Filter Operator [FIL_55] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,dt,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,dt,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_73] PartitionCols:_col0 - Select Operator [SEL_72] (rows=575995635 width=88) + Select Operator [SEL_72] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_71] (rows=575995635 width=88) + Filter Operator [FIL_71] (rows=550076554 width=114) predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_dt_d_date_sk_min) AND DynamicValue(RS_10_dt_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_dt_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_62] Group By Operator [GBY_61] (rows=1 width=12) @@ -122,7 +122,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_60] Group By Operator [GBY_59] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_58] (rows=18262 width=1119) + Select Operator [SEL_58] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_56] <-Reducer 9 [BROADCAST_EDGE] vectorized @@ -133,7 +133,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_68] Group By Operator [GBY_67] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_66] (rows=231000 width=1436) + Select Operator [SEL_66] (rows=7333 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_64] diff --git a/ql/src/test/results/clientpositive/perf/tez/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/query53.q.out index 87857e8cdb..d99529f1a5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query53.q.out @@ -65,9 +65,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -80,78 +79,78 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_116] - Limit [LIM_115] (rows=100 width=88) + File Output Operator [FS_111] + Limit [LIM_110] (rows=30 width=228) Number of rows:100 - Select Operator [SEL_114] (rows=191662559 width=88) + Select Operator [SEL_109] (rows=30 width=228) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=191662559 width=88) + Select Operator [SEL_30] (rows=30 width=228) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_46] (rows=191662559 width=88) + Filter Operator [FIL_46] (rows=30 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_29] (rows=383325119 width=88) + Select Operator [SEL_29] (rows=60 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_28] (rows=383325119 width=88) + PTF Operator [PTF_28] (rows=60 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_25] (rows=383325119 width=88) + Select Operator [SEL_25] (rows=60 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_24] (rows=383325119 width=88) + Group By Operator [GBY_24] (rows=60 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=766650239 width=88) + Group By Operator [GBY_22] (rows=60 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col11 - Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) - Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col8","_col11"] + Merge Join Operator [MERGEJOIN_84] (rows=129200 width=8) + Conds:RS_18._col2=RS_106._col0(Inner),Output:["_col3","_col8","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_102] (rows=1704 width=1910) + Select Operator [SEL_105] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_101] (rows=1704 width=1910) + Filter Operator [FIL_104] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + TableScan [TS_9] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_83] (rows=129200 width=8) Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col8","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_94] (rows=73049 width=1119) + Select Operator [SEL_94] (rows=317 width=12) Output:["_col0","_col2"] - Filter Operator [FIL_93] (rows=73049 width=1119) + Filter Operator [FIL_93] (rows=317 width=12) predicate:((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_qoy"] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_qoy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) - Conds:RS_111._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col8"] + Merge Join Operator [MERGEJOIN_82] (rows=744232 width=4) + Conds:RS_103._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col8"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=462000 width=1436) + Select Operator [SEL_86] (rows=68 width=290) Output:["_col0","_col4"] - Filter Operator [FIL_85] (rows=462000 width=1436) + Filter Operator [FIL_85] (rows=68 width=290) predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] + TableScan [TS_3] (rows=462000 width=289) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + SHUFFLE [RS_103] PartitionCols:_col1 - Select Operator [SEL_110] (rows=575995635 width=88) + Select Operator [SEL_102] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_109] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + Filter Operator [FIL_101] (rows=525329897 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_100] Group By Operator [GBY_99] (rows=1 width=12) @@ -160,20 +159,9 @@ Stage-0 SHUFFLE [RS_98] Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=73049 width=1119) + Select Operator [SEL_96] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_94] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_92] Group By Operator [GBY_91] (rows=1 width=12) @@ -182,7 +170,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=462000 width=1436) + Select Operator [SEL_88] (rows=68 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index 1c17d2a53a..8e100a6051 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -133,13 +133,13 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 17 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Union 18 (CONTAINS) +Map 1 <- Reducer 15 (BROADCAST_EDGE) +Map 17 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Union 18 (CONTAINS) Map 23 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Union 18 (CONTAINS) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) Reducer 19 <- Map 24 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) @@ -147,17 +147,16 @@ Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Reducer 21 (SIMPLE_EDGE) Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 30 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 30 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 29 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 29 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -166,288 +165,277 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_360] - Limit [LIM_359] (rows=100 width=158) + File Output Operator [FS_354] + Limit [LIM_353] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_358] (rows=1614130953450400 width=158) + Select Operator [SEL_352] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - Select Operator [SEL_356] (rows=1614130953450400 width=158) + SHUFFLE [RS_351] + Select Operator [SEL_350] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_355] (rows=1614130953450400 width=158) + Group By Operator [GBY_349] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] + SHUFFLE [RS_348] PartitionCols:_col0 - Group By Operator [GBY_353] (rows=3228261906900801 width=158) + Group By Operator [GBY_347] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_352] (rows=3228261906900801 width=158) + Select Operator [SEL_346] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_351] (rows=3228261906900801 width=158) + Group By Operator [GBY_345] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_119] PartitionCols:_col0 - Group By Operator [GBY_118] (rows=6456523813801603 width=158) + Group By Operator [GBY_118] (rows=312 width=116) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_117] (rows=6456523813801603 width=158) + Select Operator [SEL_117] (rows=624257222 width=127) Output:["_col0","_col1"] - Filter Operator [FIL_116] (rows=6456523813801603 width=158) + Filter Operator [FIL_116] (rows=624257222 width=127) predicate:_col2 BETWEEN _col3 AND _col4 - Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158) + Merge Join Operator [MERGEJOIN_273] (rows=5618315000 width=127) Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_350] - Group By Operator [GBY_349] (rows=9131 width=1119) + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_343] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] PartitionCols:_col0 - Group By Operator [GBY_328] (rows=18262 width=1119) + Group By Operator [GBY_322] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_325] (rows=18262 width=1119) + Select Operator [SEL_319] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_322] (rows=18262 width=1119) + Filter Operator [FIL_316] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_50] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + TableScan [TS_50] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_113] - Select Operator [SEL_104] (rows=6363893803988 width=1226) + Select Operator [SEL_104] (rows=224732600 width=119) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1226) + Merge Join Operator [MERGEJOIN_272] (rows=224732600 width=119) Conds:(Inner),Output:["_col0","_col4","_col11","_col13"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=8) - Filter Operator [FIL_346] (rows=1 width=8) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Select Operator [SEL_341] (rows=1 width=8) + Filter Operator [FIL_340] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_345] (rows=1 width=8) + Group By Operator [GBY_339] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_343] (rows=1 width=8) + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_338] + Group By Operator [GBY_337] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_342] (rows=9131 width=1119) - Group By Operator [GBY_341] (rows=9131 width=1119) + Select Operator [SEL_336] (rows=25 width=4) + Group By Operator [GBY_335] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col0 - Group By Operator [GBY_327] (rows=18262 width=1119) + Group By Operator [GBY_321] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_324] (rows=18262 width=1119) + Select Operator [SEL_318] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_322] + Please refer to the previous Filter Operator [FIL_316] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_101] - Select Operator [SEL_85] (rows=6363893803988 width=1217) + Select Operator [SEL_85] (rows=224732600 width=119) Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_271] (rows=6363893803988 width=1217) + Merge Join Operator [MERGEJOIN_271] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_332] (rows=9131 width=1119) + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_328] + Group By Operator [GBY_326] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0 - Group By Operator [GBY_326] (rows=18262 width=1119) + Group By Operator [GBY_320] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_323] (rows=18262 width=1119) + Select Operator [SEL_317] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_322] + Please refer to the previous Filter Operator [FIL_316] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_82] - Merge Join Operator [MERGEJOIN_270] (rows=696954748 width=97) + Merge Join Operator [MERGEJOIN_270] (rows=8989304 width=8) Conds:(Inner),Output:["_col2","_col4","_col10"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_79] - Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) Conds:RS_76._col1=RS_77._col5(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135) - Conds:RS_46._col0=RS_321._col1(Inner),Output:["_col5"] - <-Reducer 14 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_268] (rows=55046 width=4) + Conds:RS_46._col0=RS_307._col1(Inner),Output:["_col5"] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014) - Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] + Merge Join Operator [MERGEJOIN_264] (rows=39720279 width=4) + Conds:RS_286._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col0"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] PartitionCols:_col1, _col2 - Select Operator [SEL_296] (rows=40000000 width=1014) + Select Operator [SEL_285] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_295] (rows=40000000 width=1014) + Filter Operator [FIL_284] (rows=40000000 width=188) predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] + TableScan [TS_6] (rows=40000000 width=188) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] + SHUFFLE [RS_289] PartitionCols:_col0, _col1 - Select Operator [SEL_299] (rows=1704 width=1910) + Select Operator [SEL_288] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_298] (rows=1704 width=1910) + Filter Operator [FIL_287] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] + TableScan [TS_9] (rows=1704 width=184) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_307] PartitionCols:_col1 - Select Operator [SEL_320] (rows=287491029 width=135) + Select Operator [SEL_306] (rows=55046 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_319] (rows=287491029 width=135) + Group By Operator [GBY_305] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col0, _col1 - Group By Operator [GBY_39] (rows=574982058 width=135) + Group By Operator [GBY_39] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col10, _col9 - Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135) - Conds:RS_35._col1=RS_315._col0(Inner),Output:["_col9","_col10"] + Merge Join Operator [MERGEJOIN_267] (rows=110092 width=8) + Conds:RS_35._col1=RS_304._col0(Inner),Output:["_col9","_col10"] <-Map 28 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_315] + SHUFFLE [RS_304] PartitionCols:_col0 - Select Operator [SEL_314] (rows=80000000 width=860) + Select Operator [SEL_303] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_313] (rows=80000000 width=860) + Filter Operator [FIL_302] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_26] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + TableScan [TS_26] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135) - Conds:RS_32._col2=RS_309._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_266] (rows=110092 width=0) + Conds:RS_32._col2=RS_298._col0(Inner),Output:["_col1"] <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] + PARTITION_ONLY_SHUFFLE [RS_298] PartitionCols:_col0 - Select Operator [SEL_308] (rows=115500 width=1436) + Select Operator [SEL_297] (rows=453 width=190) Output:["_col0"] - Filter Operator [FIL_307] (rows=115500 width=1436) + Filter Operator [FIL_296] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) - TableScan [TS_23] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + TableScan [TS_23] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135) - Conds:Union 18._col0=RS_303._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_265] (rows=11665117 width=7) + Conds:Union 18._col0=RS_292._col0(Inner),Output:["_col1","_col2"] <-Map 24 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] + PARTITION_ONLY_SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_302] (rows=18262 width=1119) + Select Operator [SEL_291] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_301] (rows=18262 width=1119) + Filter Operator [FIL_290] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_20] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_20] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Union 18 [SIMPLE_EDGE] <-Map 17 [CONTAINS] vectorized - Reduce Output Operator [RS_371] + Reduce Output Operator [RS_363] PartitionCols:_col0 - Select Operator [SEL_370] (rows=287989836 width=135) + Select Operator [SEL_362] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_369] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_36_customer_c_customer_sk_min) AND DynamicValue(RS_36_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_36_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_274] (rows=287989836 width=135) + Filter Operator [FIL_361] (rows=285117831 width=11) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_274] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) + BROADCAST [RS_356] + Group By Operator [GBY_355] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_295] + Group By Operator [GBY_294] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=18262 width=1119) + Select Operator [SEL_293] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_302] + Please refer to the previous Select Operator [SEL_291] <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_365] - Group By Operator [GBY_364] (rows=1 width=12) + BROADCAST [RS_359] + Group By Operator [GBY_358] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_301] + Group By Operator [GBY_300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_310] (rows=115500 width=1436) + Select Operator [SEL_299] (rows=453 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_308] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_368] - Group By Operator [GBY_367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_318] - Group By Operator [GBY_317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_316] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] + Please refer to the previous Select Operator [SEL_297] <-Map 23 [CONTAINS] vectorized - Reduce Output Operator [RS_374] + Reduce Output Operator [RS_366] PartitionCols:_col0 - Select Operator [SEL_373] (rows=144002668 width=135) + Select Operator [SEL_365] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_372] (rows=144002668 width=135) + Filter Operator [FIL_364] (rows=143930993 width=11) predicate:((ws_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_279] (rows=144002668 width=135) + TableScan [TS_279] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_363] - Please refer to the previous Group By Operator [GBY_361] + BROADCAST [RS_357] + Please refer to the previous Group By Operator [GBY_355] <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Please refer to the previous Group By Operator [GBY_364] + BROADCAST [RS_360] + Please refer to the previous Group By Operator [GBY_358] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_76] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88) - Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] - PartitionCols:_col0 - Select Operator [SEL_285] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_284] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + Merge Join Operator [MERGEJOIN_263] (rows=525327388 width=114) + Conds:RS_312._col0=RS_315._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] + SHUFFLE [RS_312] PartitionCols:_col0 - Select Operator [SEL_293] (rows=575995635 width=88) + Select Operator [SEL_311] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_292] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) + Filter Operator [FIL_310] (rows=525327388 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_77_customer_c_customer_sk_min) AND DynamicValue(RS_77_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_77_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_309] + Group By Operator [GBY_308] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] - Group By Operator [GBY_288] (rows=1 width=12) + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_287] (rows=73049 width=1119) + Select Operator [SEL_181] (rows=55046 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_285] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Select Operator [SEL_339] (rows=1 width=8) - Filter Operator [FIL_338] (rows=1 width=8) + Please refer to the previous Merge Join Operator [MERGEJOIN_268] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] + PartitionCols:_col0 + Select Operator [SEL_314] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_313] (rows=73049 width=8) + predicate:d_date_sk is not null + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_334] + Select Operator [SEL_333] (rows=1 width=8) + Filter Operator [FIL_332] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_337] (rows=1 width=8) + Group By Operator [GBY_331] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Group By Operator [GBY_335] (rows=1 width=8) + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_330] + Group By Operator [GBY_329] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_333] (rows=9131 width=1119) - Please refer to the previous Group By Operator [GBY_332] + Select Operator [SEL_327] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_326] diff --git a/ql/src/test/results/clientpositive/perf/tez/query55.q.out b/ql/src/test/results/clientpositive/perf/tez/query55.q.out index a942c8fcd7..ec30b92cc6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query55.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query55.q.out @@ -49,55 +49,55 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_79] - Limit [LIM_78] (rows=100 width=88) + Limit [LIM_78] (rows=100 width=220) Number of rows:100 - Select Operator [SEL_77] (rows=348477374 width=88) + Select Operator [SEL_77] (rows=7333 width=220) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_76] - Select Operator [SEL_75] (rows=348477374 width=88) + Select Operator [SEL_75] (rows=7333 width=220) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_74] (rows=348477374 width=88) + Group By Operator [GBY_74] (rows=7333 width=216) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=7333 width=216) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8 - Merge Join Operator [MERGEJOIN_54] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_54] (rows=2301098 width=104) Conds:RS_12._col1=RS_65._col0(Inner),Output:["_col2","_col7","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_64] (rows=231000 width=1436) + Select Operator [SEL_64] (rows=7333 width=111) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_63] (rows=231000 width=1436) + Filter Operator [FIL_63] (rows=7333 width=111) predicate:((i_manager_id = 36) and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] + TableScan [TS_6] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_53] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_53] (rows=15062131 width=4) Conds:RS_73._col0=RS_57._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_57] PartitionCols:_col0 - Select Operator [SEL_56] (rows=18262 width=1119) + Select Operator [SEL_56] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_55] (rows=18262 width=1119) + Filter Operator [FIL_55] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_73] PartitionCols:_col0 - Select Operator [SEL_72] (rows=575995635 width=88) + Select Operator [SEL_72] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_71] (rows=575995635 width=88) + Filter Operator [FIL_71] (rows=550076554 width=114) predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_62] Group By Operator [GBY_61] (rows=1 width=12) @@ -106,7 +106,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_60] Group By Operator [GBY_59] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_58] (rows=18262 width=1119) + Select Operator [SEL_58] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_56] <-Reducer 9 [BROADCAST_EDGE] vectorized @@ -117,7 +117,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_68] Group By Operator [GBY_67] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_66] (rows=231000 width=1436) + Select Operator [SEL_66] (rows=7333 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_64] diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out index 1024ba8fd1..a6d3090c08 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out @@ -184,98 +184,98 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_370] - Limit [LIM_369] (rows=100 width=108) + Limit [LIM_369] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_368] (rows=335408073 width=108) + Select Operator [SEL_368] (rows=430 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_367] - Group By Operator [GBY_366] (rows=335408073 width=108) + Group By Operator [GBY_366] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 10 [CONTAINS] vectorized Reduce Output Operator [RS_382] PartitionCols:_col0 - Group By Operator [GBY_381] (rows=670816147 width=108) + Group By Operator [GBY_381] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_380] (rows=191657247 width=135) + Group By Operator [GBY_380] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=383314495 width=135) + Group By Operator [GBY_70] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_303] (rows=373066 width=100) Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_293] (rows=508200 width=1436) + Merge Join Operator [MERGEJOIN_293] (rows=17170 width=104) Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_319] PartitionCols:_col1 - Select Operator [SEL_318] (rows=462000 width=1436) + Select Operator [SEL_318] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_317] (rows=462000 width=1436) + Filter Operator [FIL_317] (rows=462000 width=104) predicate:(i_item_id is not null and i_item_sk is not null) - TableScan [TS_0] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_325] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=231000 width=1436) + Group By Operator [GBY_324] (rows=11550 width=100) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_323] PartitionCols:_col0 - Group By Operator [GBY_322] (rows=462000 width=1436) + Group By Operator [GBY_322] (rows=11550 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_321] (rows=462000 width=1436) + Select Operator [SEL_321] (rows=23100 width=189) Output:["i_item_id"] - Filter Operator [FIL_320] (rows=462000 width=1436) + Filter Operator [FIL_320] (rows=23100 width=189) predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"] + TableScan [TS_3] (rows=462000 width=189) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col4 - Select Operator [SEL_62] (rows=348467716 width=135) + Select Operator [SEL_62] (rows=1550375 width=13) Output:["_col4","_col5"] - Merge Join Operator [MERGEJOIN_298] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_298] (rows=1550375 width=13) Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_346] PartitionCols:_col0 - Select Operator [SEL_343] (rows=20000000 width=1014) + Select Operator [SEL_343] (rows=8000000 width=116) Output:["_col0"] - Filter Operator [FIL_342] (rows=20000000 width=1014) + Filter Operator [FIL_342] (rows=8000000 width=112) predicate:((ca_gmt_offset = -8) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] + TableScan [TS_16] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_297] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_297] (rows=7751872 width=98) Conds:RS_379._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_330] PartitionCols:_col0 - Select Operator [SEL_327] (rows=18262 width=1119) + Select Operator [SEL_327] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_326] (rows=18262 width=1119) + Filter Operator [FIL_326] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 2000) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 32 [SIMPLE_EDGE] vectorized SHUFFLE [RS_379] PartitionCols:_col0 - Select Operator [SEL_378] (rows=287989836 width=135) + Select Operator [SEL_378] (rows=285117733 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_377] (rows=287989836 width=135) + Filter Operator [FIL_377] (rows=285117733 width=123) predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_60_customer_address_ca_address_sk_min) AND DynamicValue(RS_60_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_60_customer_address_ca_address_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_66_item_i_item_sk_min) AND DynamicValue(RS_66_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_66_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_47] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + TableScan [TS_47] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_376] Group By Operator [GBY_375] (rows=1 width=12) @@ -284,7 +284,7 @@ Stage-0 SHUFFLE [RS_240] Group By Operator [GBY_239] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_238] (rows=508200 width=1436) + Select Operator [SEL_238] (rows=17170 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_293] <-Reducer 24 [BROADCAST_EDGE] vectorized @@ -295,33 +295,33 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_338] Group By Operator [GBY_335] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_331] (rows=18262 width=1119) + Select Operator [SEL_331] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_327] <-Reducer 30 [BROADCAST_EDGE] vectorized BROADCAST [RS_374] Group By Operator [GBY_373] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_354] Group By Operator [GBY_351] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_347] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_347] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_343] <-Reducer 13 [CONTAINS] vectorized Reduce Output Operator [RS_394] PartitionCols:_col0 - Group By Operator [GBY_393] (rows=670816147 width=108) + Group By Operator [GBY_393] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_392] (rows=95833781 width=135) + Group By Operator [GBY_392] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=191667562 width=135) + Group By Operator [GBY_108] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_304] (rows=189670 width=190) Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_104] @@ -330,9 +330,9 @@ Stage-0 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col3 - Select Operator [SEL_100] (rows=174243235 width=135) + Select Operator [SEL_100] (rows=788222 width=110) Output:["_col3","_col5"] - Merge Join Operator [MERGEJOIN_301] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_301] (rows=788222 width=110) Conds:RS_97._col2=RS_348._col0(Inner),Output:["_col1","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_348] @@ -341,7 +341,7 @@ Stage-0 <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_97] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_300] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_300] (rows=3941109 width=118) Conds:RS_391._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_332] @@ -350,12 +350,12 @@ Stage-0 <-Map 33 [SIMPLE_EDGE] vectorized SHUFFLE [RS_391] PartitionCols:_col0 - Select Operator [SEL_390] (rows=144002668 width=135) + Select Operator [SEL_390] (rows=143931246 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_389] (rows=144002668 width=135) + Filter Operator [FIL_389] (rows=143931246 width=123) predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_98_customer_address_ca_address_sk_min) AND DynamicValue(RS_98_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_98_customer_address_ca_address_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_104_item_i_item_sk_min) AND DynamicValue(RS_104_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_104_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_85] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + TableScan [TS_85] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized BROADCAST [RS_388] Group By Operator [GBY_387] (rows=1 width=12) @@ -364,7 +364,7 @@ Stage-0 SHUFFLE [RS_280] Group By Operator [GBY_279] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_278] (rows=508200 width=1436) + Select Operator [SEL_278] (rows=17170 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_293] <-Reducer 27 [BROADCAST_EDGE] vectorized @@ -375,33 +375,33 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_339] Group By Operator [GBY_336] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_333] (rows=18262 width=1119) + Select Operator [SEL_333] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_327] <-Reducer 31 [BROADCAST_EDGE] vectorized BROADCAST [RS_386] Group By Operator [GBY_385] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_355] Group By Operator [GBY_352] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_349] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_349] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_343] <-Reducer 4 [CONTAINS] vectorized Reduce Output Operator [RS_365] PartitionCols:_col0 - Group By Operator [GBY_364] (rows=670816147 width=108) + Group By Operator [GBY_364] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_363] (rows=383325119 width=88) + Group By Operator [GBY_363] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=766650239 width=88) + Group By Operator [GBY_33] (rows=430 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_302] (rows=692265 width=100) Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_29] @@ -410,9 +410,9 @@ Stage-0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Select Operator [SEL_25] (rows=696954748 width=88) + Select Operator [SEL_25] (rows=2876890 width=4) Output:["_col3","_col5"] - Merge Join Operator [MERGEJOIN_295] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_295] (rows=2876890 width=4) Conds:RS_22._col2=RS_344._col0(Inner),Output:["_col1","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_344] @@ -421,7 +421,7 @@ Stage-0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_294] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_294] (rows=14384447 width=4) Conds:RS_362._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_328] @@ -430,12 +430,12 @@ Stage-0 <-Map 17 [SIMPLE_EDGE] vectorized SHUFFLE [RS_362] PartitionCols:_col0 - Select Operator [SEL_361] (rows=575995635 width=88) + Select Operator [SEL_361] (rows=525327191 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_360] (rows=575995635 width=88) + Filter Operator [FIL_360] (rows=525327191 width=118) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_23_customer_address_ca_address_sk_min) AND DynamicValue(RS_23_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_23_customer_address_ca_address_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_item_i_item_sk_min) AND DynamicValue(RS_29_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + TableScan [TS_10] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 21 [BROADCAST_EDGE] vectorized BROADCAST [RS_341] Group By Operator [GBY_340] (rows=1 width=12) @@ -444,18 +444,18 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_337] Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_329] (rows=18262 width=1119) + Select Operator [SEL_329] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_327] <-Reducer 29 [BROADCAST_EDGE] vectorized BROADCAST [RS_357] Group By Operator [GBY_356] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_353] Group By Operator [GBY_350] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_345] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_345] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_343] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -466,7 +466,7 @@ Stage-0 SHUFFLE [RS_200] Group By Operator [GBY_199] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=508200 width=1436) + Select Operator [SEL_198] (rows=17170 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_293] diff --git a/ql/src/test/results/clientpositive/perf/tez/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/query57.q.out index 7299f9f698..39b95108e2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query57.q.out @@ -105,180 +105,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 1 <- Reducer 13 (BROADCAST_EDGE) Reducer 10 <- Reducer 5 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_334] - Limit [LIM_333] (rows=100 width=135) + File Output Operator [FS_321] + Limit [LIM_320] (rows=100 width=758) Number of rows:100 - Select Operator [SEL_332] (rows=421645952 width=135) + Select Operator [SEL_319] (rows=130121 width=758) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_108] - Select Operator [SEL_107] (rows=421645952 width=135) + SHUFFLE [RS_110] + Select Operator [SEL_109] (rows=130121 width=758) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_279] (rows=421645952 width=135) - Conds:RS_320._col0, _col1, _col2, (_col4 + 1)=RS_331._col0, _col1, _col2, _col7(Inner),RS_331._col0, _col1, _col2, _col7=RS_315._col0, _col1, _col2, (_col4 - 1)(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] - PartitionCols:_col0, _col1, _col2, _col7 - Select Operator [SEL_330] (rows=15971437 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_329] (rows=15971437 width=135) - predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END - Select Operator [SEL_328] (rows=31942874 width=135) - Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_327] (rows=31942874 width=135) - predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) - PTF Operator [PTF_326] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5"}] - Select Operator [SEL_325] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] - PartitionCols:_col1, _col0, _col4 - Select Operator [SEL_323] (rows=191657247 width=135) - Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_322] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col2"}] - Select Operator [SEL_321] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - PartitionCols:_col1, _col0, _col4, _col2 - Group By Operator [GBY_307] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_92] (rows=383314495 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col10, _col11, _col5, _col6, _col8 - Merge Join Operator [MERGEJOIN_278] (rows=383314495 width=135) - Conds:RS_88._col2=RS_298._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] - PartitionCols:_col0 - Select Operator [SEL_297] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_296] (rows=462000 width=1436) - predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) - TableScan [TS_79] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_277] (rows=348467716 width=135) - Conds:RS_85._col1=RS_290._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=60 width=2045) - Output:["_col0","_col1"] - Filter Operator [FIL_288] (rows=60 width=2045) - predicate:(cc_call_center_sk is not null and cc_name is not null) - TableScan [TS_76] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=316788826 width=135) - Conds:RS_306._col0=RS_282._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] - PartitionCols:_col0 - Select Operator [SEL_281] (rows=73049 width=1119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_280] (rows=73049 width=1119) - predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - PartitionCols:_col0 - Select Operator [SEL_305] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_304] (rows=287989836 width=135) - predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_86_call_center_cc_call_center_sk_min) AND DynamicValue(RS_86_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_86_call_center_cc_call_center_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_89_item_i_item_sk_min) AND DynamicValue(RS_89_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_89_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_70] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_283] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_281] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] - Group By Operator [GBY_292] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=60 width=2045) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_303] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_299] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_297] + Merge Join Operator [MERGEJOIN_278] (rows=130121 width=646) + Conds:RS_106._col5, _col6, _col7, _col12=RS_306._col0, _col1, _col2, (_col4 - 1)(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + SHUFFLE [RS_306] PartitionCols:_col0, _col1, _col2, (_col4 - 1) - Select Operator [SEL_314] (rows=191657247 width=135) + Select Operator [SEL_304] (rows=87441185 width=404) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_313] (rows=191657247 width=135) + Filter Operator [FIL_302] (rows=87441185 width=408) predicate:rank_window_0 is not null - PTF Operator [PTF_312] (rows=191657247 width=135) + PTF Operator [PTF_300] (rows=87441185 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] - Select Operator [SEL_311] (rows=191657247 width=135) + Select Operator [SEL_299] (rows=87441185 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] + SHUFFLE [RS_297] PartitionCols:_col1, _col0, _col4 - Please refer to the previous Group By Operator [GBY_307] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] - PartitionCols:_col0, _col1, _col2, (_col4 + 1) - Select Operator [SEL_319] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_318] (rows=191657247 width=135) - predicate:rank_window_0 is not null - PTF Operator [PTF_317] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] - Select Operator [SEL_316] (rows=191657247 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - PartitionCols:_col1, _col0, _col4 - Please refer to the previous Group By Operator [GBY_307] + Group By Operator [GBY_296] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_92] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col10, _col11, _col5, _col6, _col8 + Merge Join Operator [MERGEJOIN_276] (rows=87441185 width=406) + Conds:RS_88._col2=RS_295._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_294] (rows=462000 width=194) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_293] (rows=462000 width=194) + predicate:(i_brand is not null and i_category is not null and i_item_sk is not null) + TableScan [TS_79] (rows=462000 width=194) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_275] (rows=87441185 width=220) + Conds:RS_85._col1=RS_292._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] + PartitionCols:_col0 + Select Operator [SEL_291] (rows=60 width=102) + Output:["_col0","_col1"] + Filter Operator [FIL_290] (rows=60 width=102) + predicate:(cc_call_center_sk is not null and cc_name is not null) + TableScan [TS_76] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_274] (rows=87441185 width=126) + Conds:RS_289._col0=RS_281._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_281] + PartitionCols:_col0 + Select Operator [SEL_280] (rows=564 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_279] (rows=564 width=12) + predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) + TableScan [TS_73] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=285117980 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_287] (rows=285117980 width=123) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_70] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + Group By Operator [GBY_283] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_282] (rows=564 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_280] + <-Reducer 9 [ONE_TO_ONE_EDGE] + FORWARD [RS_106] + PartitionCols:_col5, _col6, _col7, _col12 + Merge Join Operator [MERGEJOIN_277] (rows=130121 width=636) + Conds:RS_307._col0, _col1, _col2, (_col4 + 1)=RS_318._col0, _col1, _col2, _col7(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + PartitionCols:_col0, _col1, _col2, (_col4 + 1) + Select Operator [SEL_305] (rows=87441185 width=404) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_303] (rows=87441185 width=408) + predicate:rank_window_0 is not null + PTF Operator [PTF_301] (rows=87441185 width=408) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] + Please refer to the previous Select Operator [SEL_299] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0, _col1, _col2, _col7 + Select Operator [SEL_317] (rows=130121 width=524) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_316] (rows=130121 width=524) + predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END + Select Operator [SEL_315] (rows=260242 width=520) + Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_314] (rows=260242 width=520) + predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) + PTF Operator [PTF_313] (rows=87441185 width=520) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5"}] + Select Operator [SEL_312] (rows=87441185 width=520) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col1, _col0, _col4 + Select Operator [SEL_310] (rows=87441185 width=408) + Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_309] (rows=87441185 width=408) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col2"}] + Select Operator [SEL_308] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] + PartitionCols:_col1, _col0, _col4, _col2 + Please refer to the previous Group By Operator [GBY_296] diff --git a/ql/src/test/results/clientpositive/perf/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tez/query58.q.out index f60e32033e..9e71fc0a54 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query58.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[403][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[404][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -142,280 +142,252 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 19 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 28 <- Reducer 12 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) -Map 29 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) -Reducer 10 <- Reducer 18 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 1 <- Reducer 17 (BROADCAST_EDGE) +Map 26 <- Reducer 18 (BROADCAST_EDGE) +Map 27 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 29 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Reducer 26 (ONE_TO_ONE_EDGE) -Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 27 (CUSTOM_SIMPLE_EDGE), Reducer 23 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 27 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 12 <- Map 27 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 24 (ONE_TO_ONE_EDGE) +Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 25 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 28 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 26 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_480] - Limit [LIM_479] (rows=100 width=88) + Reducer 7 vectorized + File Output Operator [FS_467] + Limit [LIM_466] (rows=1 width=884) Number of rows:100 - Select Operator [SEL_478] (rows=1442 width=88) + Select Operator [SEL_465] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_161] - Select Operator [SEL_160] (rows=1442 width=88) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_163] + Select Operator [SEL_162] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_154] (rows=1442 width=88) - predicate:(_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col3 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3)) - Merge Join Operator [MERGEJOIN_417] (rows=766650239 width=88) - Conds:RS_459._col0=RS_468._col0(Inner),RS_459._col0=RS_477._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_468] + Filter Operator [FIL_158] (rows=1 width=436) + predicate:(_col1 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col3 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3)) + Merge Join Operator [MERGEJOIN_419] (rows=1 width=436) + Conds:RS_155._col0=RS_464._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_464] PartitionCols:_col0 - Group By Operator [GBY_467] (rows=348477374 width=88) + Group By Operator [GBY_463] (rows=69 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_97] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_147] PartitionCols:_col0 - Group By Operator [GBY_96] (rows=696954748 width=88) + Group By Operator [GBY_146] (rows=69 width=212) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_415] (rows=696954748 width=88) - Conds:RS_92._col0=RS_93._col0(Inner),Output:["_col2","_col4"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_93] + Merge Join Operator [MERGEJOIN_417] (rows=31537 width=100) + Conds:RS_142._col0=RS_143._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_143] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_405] (rows=80353 width=1119) - Conds:RS_436._col1=RS_452._col0(Inner),Output:["_col0"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_436] + Merge Join Operator [MERGEJOIN_406] (rows=2 width=4) + Conds:RS_422._col1=RS_438._col0(Inner),Output:["_col0"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_422] PartitionCols:_col1 - Select Operator [SEL_435] (rows=73049 width=1119) + Select Operator [SEL_421] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_434] (rows=73049 width=1119) + Filter Operator [FIL_420] (rows=73049 width=98) predicate:(d_date is not null and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 26 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_452] + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 24 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_438] PartitionCols:_col0 - Group By Operator [GBY_451] (rows=40176 width=1119) + Group By Operator [GBY_437] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 - <-Reducer 25 [SIMPLE_EDGE] + <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col0 - Group By Operator [GBY_31] (rows=80353 width=1119) + Group By Operator [GBY_31] (rows=2 width=94) Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_404] (rows=80353 width=1119) - Conds:RS_27._col1=RS_449._col1(Inner),Output:["_col2"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_449] + Merge Join Operator [MERGEJOIN_405] (rows=5 width=94) + Conds:RS_27._col1=RS_435._col1(Inner),Output:["_col2"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_435] PartitionCols:_col1 - Select Operator [SEL_447] (rows=73049 width=1119) + Select Operator [SEL_433] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_445] (rows=73049 width=1119) + Filter Operator [FIL_431] (rows=73049 width=98) predicate:(d_date is not null and d_week_seq is not null) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Reducer 24 [SIMPLE_EDGE] + TableScan [TS_21] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_403] (rows=36524 width=1128) + Merge Join Operator [MERGEJOIN_404] (rows=1 width=4) Conds:(Inner),Output:["_col1"] - <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_450] - Select Operator [SEL_448] (rows=36524 width=1119) + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_436] + Select Operator [SEL_434] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_446] (rows=36524 width=1119) + Filter Operator [FIL_432] (rows=1 width=98) predicate:((d_date = '1998-02-19') and d_week_seq is not null) Please refer to the previous TableScan [TS_21] - <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_444] - Select Operator [SEL_443] (rows=1 width=8) - Filter Operator [FIL_442] (rows=1 width=8) + <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_430] + Select Operator [SEL_429] (rows=1 width=8) + Filter Operator [FIL_428] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_441] (rows=1 width=8) + Group By Operator [GBY_427] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_440] - Group By Operator [GBY_439] (rows=1 width=8) + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_426] + Group By Operator [GBY_425] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_438] (rows=36524 width=1119) - Filter Operator [FIL_437] (rows=36524 width=1119) + Select Operator [SEL_424] (rows=1 width=94) + Filter Operator [FIL_423] (rows=1 width=94) predicate:(d_date = '1998-02-19') - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_92] + TableScan [TS_9] (rows=73049 width=94) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_142] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_406] (rows=633595212 width=88) - Conds:RS_466._col1=RS_422._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_422] + Merge Join Operator [MERGEJOIN_411] (rows=143966864 width=215) + Conds:RS_462._col1=RS_448._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_448] PartitionCols:_col0 - Select Operator [SEL_419] (rows=462000 width=1436) + Select Operator [SEL_445] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_418] (rows=462000 width=1436) + Filter Operator [FIL_444] (rows=462000 width=104) predicate:(i_item_id is not null and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_466] - PartitionCols:_col1 - Select Operator [SEL_465] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_464] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_90_item_i_item_sk_min) AND DynamicValue(RS_90_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_90_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_93_date_dim_d_date_sk_min) AND DynamicValue(RS_93_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_93_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_50] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_461] - Group By Operator [GBY_460] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_430] - Group By Operator [GBY_427] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_423] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_419] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_463] - Group By Operator [GBY_462] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_321] - Group By Operator [GBY_320] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_319] (rows=80353 width=1119) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_405] - <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_477] - PartitionCols:_col0 - Group By Operator [GBY_476] (rows=87121617 width=135) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_147] - PartitionCols:_col0 - Group By Operator [GBY_146] (rows=174243235 width=135) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_416] (rows=174243235 width=135) - Conds:RS_142._col0=RS_143._col0(Inner),Output:["_col2","_col4"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_143] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_405] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_142] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_410] (rows=158402938 width=135) - Conds:RS_475._col1=RS_424._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_424] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_419] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_475] + TableScan [TS_3] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_462] PartitionCols:_col1 - Select Operator [SEL_474] (rows=144002668 width=135) + Select Operator [SEL_461] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_473] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_140_item_i_item_sk_min) AND DynamicValue(RS_140_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_140_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_143_date_dim_d_date_sk_min) AND DynamicValue(RS_143_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_143_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_100] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_470] - Group By Operator [GBY_469] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_431] - Group By Operator [GBY_428] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_425] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_419] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_472] - Group By Operator [GBY_471] (rows=1 width=12) + Filter Operator [FIL_460] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_143_date_dim_d_date_sk_min) AND DynamicValue(RS_143_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_143_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_100] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_459] + Group By Operator [GBY_458] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_367] - Group By Operator [GBY_366] (rows=1 width=12) + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_363] + Group By Operator [GBY_362] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_365] (rows=80353 width=1119) + Select Operator [SEL_361] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_405] - <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_459] + Please refer to the previous Merge Join Operator [MERGEJOIN_406] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_155] PartitionCols:_col0 - Group By Operator [GBY_458] (rows=174233858 width=135) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] - PartitionCols:_col0 - Group By Operator [GBY_46] (rows=348467716 width=135) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_414] (rows=348467716 width=135) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col2","_col4"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_43] + Filter Operator [FIL_153] (rows=1 width=324) + predicate:(_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) + Merge Join Operator [MERGEJOIN_418] (rows=68 width=324) + Conds:RS_450._col0=RS_457._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_457] + PartitionCols:_col0 + Group By Operator [GBY_456] (rows=69 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_97] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_405] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_42] + Group By Operator [GBY_96] (rows=69 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_416] (rows=120498 width=100) + Conds:RS_92._col0=RS_93._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_406] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_92] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_407] (rows=550076554 width=210) + Conds:RS_455._col1=RS_447._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_447] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_445] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_455] + PartitionCols:_col1 + Select Operator [SEL_454] (rows=550076554 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_453] (rows=550076554 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_93_date_dim_d_date_sk_min) AND DynamicValue(RS_93_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_93_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_50] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_452] + Group By Operator [GBY_451] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_315] (rows=2 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_406] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_450] + PartitionCols:_col0 + Group By Operator [GBY_449] (rows=68 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_402] (rows=316788826 width=135) - Conds:RS_457._col1=RS_420._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_420] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_419] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_457] - PartitionCols:_col1 - Select Operator [SEL_456] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_455] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_40_item_i_item_sk_min) AND DynamicValue(RS_40_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_40_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_454] - Group By Operator [GBY_453] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_271] - Group By Operator [GBY_270] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_269] (rows=80353 width=1119) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_405] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_433] - Group By Operator [GBY_432] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_429] - Group By Operator [GBY_426] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_421] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_419] + Group By Operator [GBY_46] (rows=68 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_415] (rows=62327 width=100) + Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_406] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_403] (rows=286549727 width=215) + Conds:RS_443._col1=RS_446._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_446] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_445] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_443] + PartitionCols:_col1 + Select Operator [SEL_442] (rows=286549727 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_441] (rows=286549727 width=119) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_440] + Group By Operator [GBY_439] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_267] + Group By Operator [GBY_266] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_265] (rows=2 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_406] diff --git a/ql/src/test/results/clientpositive/perf/tez/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/query59.q.out index c74c5323b9..76b4a5e25d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query59.q.out @@ -95,197 +95,132 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 18 <- Reducer 14 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) -Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 19 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 10 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_234] - Limit [LIM_233] (rows=100 width=88) + File Output Operator [FS_210] + Limit [LIM_209] (rows=100 width=976) Number of rows:100 - Select Operator [SEL_232] (rows=421657640 width=88) + Select Operator [SEL_208] (rows=1012347 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_59] - Select Operator [SEL_58] (rows=421657640 width=88) + Select Operator [SEL_58] (rows=1012347 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Merge Join Operator [MERGEJOIN_185] (rows=421657640 width=88) + Merge Join Operator [MERGEJOIN_185] (rows=1012347 width=1648) Conds:RS_55._col12, _col0=RS_56._col1, (_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13","_col16","_col17","_col18","_col19","_col20","_col21"] - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_56] PartitionCols:_col1, (_col0 - 52) - Select Operator [SEL_48] (rows=383325119 width=88) + Select Operator [SEL_48] (rows=28847 width=776) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_184] (rows=383325119 width=88) - Conds:RS_45._col1=RS_221._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col11"] - <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_221] + Merge Join Operator [MERGEJOIN_184] (rows=28847 width=776) + Conds:RS_45._col1=RS_207._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col11"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_207] PartitionCols:_col0 - Select Operator [SEL_220] (rows=1704 width=1910) + Select Operator [SEL_206] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_219] (rows=1704 width=1910) + Filter Operator [FIL_205] (rows=1704 width=104) predicate:(s_store_id is not null and s_store_sk is not null) - TableScan [TS_39] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] - <-Reducer 12 [SIMPLE_EDGE] + TableScan [TS_39] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_183] (rows=348477374 width=88) - Conds:RS_231._col0=RS_216._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + Merge Join Operator [MERGEJOIN_183] (rows=28847 width=676) + Conds:RS_204._col0=RS_199._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] PartitionCols:_col1 - Select Operator [SEL_214] (rows=73049 width=1119) + Select Operator [SEL_197] (rows=317 width=8) Output:["_col1"] - Filter Operator [FIL_212] (rows=73049 width=1119) + Filter Operator [FIL_195] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) - TableScan [TS_15] (rows=73049 width=1119) - default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_week_seq"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] + TableScan [TS_15] (rows=73049 width=8) + default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] PartitionCols:_col0 - Group By Operator [GBY_230] (rows=316797606 width=88) + Group By Operator [GBY_203] (rows=1196832 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 - <-Reducer 10 [SIMPLE_EDGE] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col0, _col1 - Group By Operator [GBY_32] (rows=633595212 width=88) + Group By Operator [GBY_32] (rows=525329897 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_30] (rows=633595212 width=88) + Select Operator [SEL_30] (rows=525329897 width=205) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_182] (rows=633595212 width=88) - Conds:RS_229._col0=RS_190._col0(Inner),Output:["_col1","_col2","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_190] + Merge Join Operator [MERGEJOIN_179] (rows=525329897 width=205) + Conds:RS_188._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_188] PartitionCols:_col0 - Select Operator [SEL_187] (rows=73049 width=1119) + Select Operator [SEL_187] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_186] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] + Filter Operator [FIL_186] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_191] PartitionCols:_col0 - Select Operator [SEL_228] (rows=575995635 width=88) + Select Operator [SEL_190] (rows=73049 width=99) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_227] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_46_store_s_store_sk_min) AND DynamicValue(RS_46_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_46_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_21] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_218] - Group By Operator [GBY_217] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] - Group By Operator [GBY_193] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_191] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_187] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_226] - Group By Operator [GBY_225] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] - Group By Operator [GBY_223] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_220] + Filter Operator [FIL_189] (rows=73049 width=99) + predicate:(d_date_sk is not null and d_week_seq is not null) + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col12, _col0 - Merge Join Operator [MERGEJOIN_181] (rows=383325119 width=88) - Conds:RS_52._col1=RS_200._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_181] (rows=28847 width=976) + Conds:RS_52._col1=RS_202._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_202] PartitionCols:_col0 - Select Operator [SEL_199] (rows=1704 width=1910) + Select Operator [SEL_201] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_198] (rows=1704 width=1910) + Filter Operator [FIL_200] (rows=1704 width=192) predicate:(s_store_id is not null and s_store_sk is not null) - TableScan [TS_18] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] + TableScan [TS_18] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=348477374 width=88) - Conds:RS_210._col0=RS_215._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + Merge Join Operator [MERGEJOIN_180] (rows=28847 width=788) + Conds:RS_193._col0=RS_198._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] PartitionCols:_col1 - Select Operator [SEL_213] (rows=73049 width=1119) + Select Operator [SEL_196] (rows=317 width=8) Output:["_col1"] - Filter Operator [FIL_211] (rows=73049 width=1119) + Filter Operator [FIL_194] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) Please refer to the previous TableScan [TS_15] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_210] + SHUFFLE [RS_193] PartitionCols:_col0 - Group By Operator [GBY_209] (rows=316797606 width=88) + Group By Operator [GBY_192] (rows=1196832 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=633595212 width=88) + Group By Operator [GBY_11] (rows=525329897 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_9] (rows=633595212 width=88) + Select Operator [SEL_9] (rows=525329897 width=205) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_179] (rows=633595212 width=88) - Conds:RS_208._col0=RS_188._col0(Inner),Output:["_col1","_col2","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_187] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_208] - PartitionCols:_col0 - Select Operator [SEL_207] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_206] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_205] - Group By Operator [GBY_204] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] - Group By Operator [GBY_202] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_201] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_199] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_197] - Group By Operator [GBY_196] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_194] - Group By Operator [GBY_192] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_189] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_187] + Please refer to the previous Merge Join Operator [MERGEJOIN_179] diff --git a/ql/src/test/results/clientpositive/perf/tez/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/query6.q.out index a999e166c8..7de4229275 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Reducer 19' is a cross product +Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Reducer 15' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a @@ -64,209 +64,175 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 13 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) -Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 3 (BROADCAST_EDGE) +Map 16 <- Reducer 15 (BROADCAST_EDGE) +Map 6 <- Map 1 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 2 (SIMPLE_EDGE) +Reducer 4 <- Map 2 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 8 <- Map 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_227] - Limit [LIM_226] (rows=100 width=88) + Reducer 10 vectorized + File Output Operator [FS_234] + Limit [LIM_233] (rows=1 width=94) Number of rows:100 - Select Operator [SEL_225] (rows=127775039 width=88) + Select Operator [SEL_232] (rows=1 width=94) Output:["_col0","_col1"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] - Filter Operator [FIL_223] (rows=127775039 width=88) + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + Filter Operator [FIL_230] (rows=1 width=94) predicate:(_col1 >= 10L) - Group By Operator [GBY_222] (rows=383325119 width=88) + Group By Operator [GBY_229] (rows=1 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=766650239 width=88) + Group By Operator [GBY_68] (rows=1 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col9 - Merge Join Operator [MERGEJOIN_174] (rows=766650239 width=88) - Conds:RS_64._col4=RS_65._col0(Inner),Output:["_col9"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_65] + Merge Join Operator [MERGEJOIN_174] (rows=316 width=86) + Conds:RS_64._col4=RS_213._col0(Inner),Output:["_col9"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] PartitionCols:_col0 - Select Operator [SEL_54] (rows=169400 width=1436) + Select Operator [SEL_212] (rows=154000 width=227) Output:["_col0"] - Filter Operator [FIL_53] (rows=169400 width=1436) + Filter Operator [FIL_211] (rows=154000 width=227) predicate:(_col4 > (1.2 * CAST( _col0 AS decimal(16,6)))) - Merge Join Operator [MERGEJOIN_172] (rows=508200 width=1436) - Conds:RS_213._col1=RS_216._col2(Inner),Output:["_col0","_col3","_col4"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] - PartitionCols:_col2 - Select Operator [SEL_215] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_214] (rows=462000 width=1436) - predicate:(i_category is not null and i_item_sk is not null) - TableScan [TS_44] (rows=462000 width=1436) - default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"] - <-Reducer 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + Map Join Operator [MAPJOIN_210] (rows=462000 width=227) + Conds:RS_207._col1=SEL_209._col2(Inner),HybridGraceHashJoin:true,Output:["_col0","_col3","_col4"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_207] PartitionCols:_col1 - Map Join Operator [MAPJOIN_212] (rows=231000 width=1445) + Map Join Operator [MAPJOIN_206] (rows=10 width=202) Conds:(Inner),Output:["_col0","_col1"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_209] - Select Operator [SEL_208] (rows=1 width=8) - Filter Operator [FIL_207] (rows=1 width=8) + <-Reducer 5 [BROADCAST_EDGE] vectorized + BROADCAST [RS_203] + Select Operator [SEL_202] (rows=1 width=8) + Filter Operator [FIL_201] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_206] (rows=1 width=8) + Group By Operator [GBY_200] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_205] - Group By Operator [GBY_204] (rows=1 width=8) + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_199] + Group By Operator [GBY_198] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_203] (rows=9131 width=1119) - Group By Operator [GBY_202] (rows=9131 width=1119) + Select Operator [SEL_197] (rows=25 width=4) + Group By Operator [GBY_196] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 9 [SIMPLE_EDGE] vectorized + <-Map 2 [SIMPLE_EDGE] vectorized SHUFFLE [RS_186] PartitionCols:_col0 - Group By Operator [GBY_184] (rows=18262 width=1119) + Group By Operator [GBY_184] (rows=25 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_182] (rows=18262 width=1119) + Select Operator [SEL_182] (rows=50 width=12) Output:["d_month_seq"] - Filter Operator [FIL_180] (rows=18262 width=1119) + Filter Operator [FIL_180] (rows=50 width=12) predicate:((d_moy = 2) and (d_year = 2000)) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Select Operator [SEL_211] (rows=231000 width=1436) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] + <-Select Operator [SEL_205] (rows=10 width=202) Output:["_col0","_col1"] - Group By Operator [GBY_210] (rows=231000 width=1436) + Group By Operator [GBY_204] (rows=10 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] PartitionCols:_col0 - Group By Operator [GBY_200] (rows=462000 width=1436) + Group By Operator [GBY_194] (rows=10 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category - Filter Operator [FIL_199] (rows=462000 width=1436) + Filter Operator [FIL_193] (rows=462000 width=201) predicate:i_category is not null - TableScan [TS_23] (rows=462000 width=1436) - default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] - <-Reducer 4 [SIMPLE_EDGE] + TableScan [TS_23] (rows=462000 width=201) + default@item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_current_price","i_category"] + <-Select Operator [SEL_209] (rows=462000 width=205) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_208] (rows=462000 width=205) + predicate:(i_category is not null and i_item_sk is not null) + TableScan [TS_44] (rows=462000 width=205) + default@item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_category"] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_173] (rows=696954748 width=88) - Conds:RS_61._col5=RS_62._col0(Inner),Output:["_col4","_col9"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_170] (rows=88000001 width=860) - Conds:RS_193._col1=RS_196._col0(Inner),Output:["_col0","_col3"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] - PartitionCols:_col1 - Select Operator [SEL_192] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_191] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_13] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] - PartitionCols:_col0 - Select Operator [SEL_195] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_194] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_16] (rows=40000000 width=1014) - default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_61] + Merge Join Operator [MERGEJOIN_173] (rows=7192227 width=90) + Conds:RS_222._col5=RS_62._col0(Inner),Output:["_col4","_col9"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_169] (rows=633595212 width=88) - Conds:RS_58._col0=RS_221._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_58] + Map Join Operator [MAPJOIN_221] (rows=7192227 width=4) + Conds:RS_192._col0=SEL_220._col0(Inner),HybridGraceHashJoin:true,Output:["_col4","_col5"] + <-Map 1 [BROADCAST_EDGE] vectorized + BROADCAST [RS_192] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_168] (rows=80353 width=1119) - Conds:RS_178._col1=RS_188._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_178] - PartitionCols:_col1 - Select Operator [SEL_177] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_176] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_month_seq is not null) - TableScan [TS_0] (rows=73049 width=1119) - default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_188] + Map Join Operator [MAPJOIN_191] (rows=660 width=4) + Conds:SEL_190._col1=RS_188._col0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + <-Reducer 3 [BROADCAST_EDGE] vectorized + BROADCAST [RS_188] PartitionCols:_col0 - Group By Operator [GBY_187] (rows=9131 width=1119) + Group By Operator [GBY_187] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 9 [SIMPLE_EDGE] vectorized + <-Map 2 [SIMPLE_EDGE] vectorized SHUFFLE [RS_185] PartitionCols:_col0 - Group By Operator [GBY_183] (rows=18262 width=1119) + Group By Operator [GBY_183] (rows=25 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_181] (rows=18262 width=1119) + Select Operator [SEL_181] (rows=50 width=12) Output:["d_month_seq"] - Filter Operator [FIL_179] (rows=18262 width=1119) + Filter Operator [FIL_179] (rows=50 width=12) predicate:((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) Please refer to the previous TableScan [TS_3] + <-Select Operator [SEL_190] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_189] (rows=73049 width=8) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Select Operator [SEL_220] (rows=525327388 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_219] (rows=525327388 width=11) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_65_i_i_item_sk_min) AND DynamicValue(RS_65_i_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_65_i_i_item_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_10] (rows=575995635 width=11) + default@store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_214] (rows=154000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_212] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_170] (rows=80000000 width=90) + Conds:RS_225._col1=RS_228._col0(Inner),Output:["_col0","_col3"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col1 + Select Operator [SEL_224] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_223] (rows=80000000 width=8) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_13] (rows=80000000 width=8) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_220] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_219] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_62_c_c_customer_sk_min) AND DynamicValue(RS_62_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_62_c_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_65_i_i_item_sk_min) AND DynamicValue(RS_65_i_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_65_i_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_58_d_d_date_sk_min) AND DynamicValue(RS_58_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_58_d_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_198] - Group By Operator [GBY_197] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] - Select Operator [SEL_128] (rows=88000001 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_170] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_218] - Group By Operator [GBY_217] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 20 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=169400 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_54] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_190] - Group By Operator [GBY_189] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_125] - Group By Operator [GBY_124] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_123] (rows=80353 width=1119) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_168] + Select Operator [SEL_227] (rows=40000000 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_226] (rows=40000000 width=90) + predicate:ca_address_sk is not null + TableScan [TS_16] (rows=40000000 width=90) + default@customer_address,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query60.q.out b/ql/src/test/results/clientpositive/perf/tez/query60.q.out index 8f534bd5af..f94101a4c7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query60.q.out @@ -204,100 +204,100 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_375] - Limit [LIM_374] (rows=100 width=108) + Limit [LIM_374] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_373] (rows=335408073 width=108) + Select Operator [SEL_373] (rows=1717 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_372] - Group By Operator [GBY_371] (rows=335408073 width=108) + Group By Operator [GBY_371] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 10 [CONTAINS] vectorized Reduce Output Operator [RS_388] PartitionCols:_col0 - Group By Operator [GBY_387] (rows=670816147 width=108) + Group By Operator [GBY_387] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_386] (rows=670816147 width=108) + Top N Key Operator [TNK_386] (rows=5151 width=212) keys:_col0,sort order:+,top n:100 - Group By Operator [GBY_385] (rows=191657247 width=135) + Group By Operator [GBY_385] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=383314495 width=135) + Group By Operator [GBY_70] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_304] (rows=746132 width=100) Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_294] (rows=508200 width=1436) + Merge Join Operator [MERGEJOIN_294] (rows=34340 width=104) Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_323] PartitionCols:_col1 - Select Operator [SEL_322] (rows=462000 width=1436) + Select Operator [SEL_322] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=462000 width=1436) + Filter Operator [FIL_321] (rows=462000 width=104) predicate:(i_item_id is not null and i_item_sk is not null) - TableScan [TS_0] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_329] PartitionCols:_col0 - Group By Operator [GBY_328] (rows=115500 width=1436) + Group By Operator [GBY_328] (rows=23100 width=100) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_327] PartitionCols:_col0 - Group By Operator [GBY_326] (rows=231000 width=1436) + Group By Operator [GBY_326] (rows=23100 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_325] (rows=231000 width=1436) + Select Operator [SEL_325] (rows=46200 width=190) Output:["i_item_id"] - Filter Operator [FIL_324] (rows=231000 width=1436) + Filter Operator [FIL_324] (rows=46200 width=190) predicate:((i_category = 'Children') and i_item_id is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"] + TableScan [TS_3] (rows=462000 width=190) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col4 - Select Operator [SEL_62] (rows=348467716 width=135) + Select Operator [SEL_62] (rows=1550375 width=13) Output:["_col4","_col5"] - Merge Join Operator [MERGEJOIN_299] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_299] (rows=1550375 width=13) Conds:RS_59._col1=RS_350._col0(Inner),Output:["_col2","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_350] PartitionCols:_col0 - Select Operator [SEL_347] (rows=20000000 width=1014) + Select Operator [SEL_347] (rows=8000000 width=116) Output:["_col0"] - Filter Operator [FIL_346] (rows=20000000 width=1014) + Filter Operator [FIL_346] (rows=8000000 width=112) predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] + TableScan [TS_16] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_298] (rows=7751872 width=98) Conds:RS_384._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_334] PartitionCols:_col0 - Select Operator [SEL_331] (rows=18262 width=1119) + Select Operator [SEL_331] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_330] (rows=18262 width=1119) + Filter Operator [FIL_330] (rows=50 width=12) predicate:((d_moy = 9) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 32 [SIMPLE_EDGE] vectorized SHUFFLE [RS_384] PartitionCols:_col0 - Select Operator [SEL_383] (rows=287989836 width=135) + Select Operator [SEL_383] (rows=285117733 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_382] (rows=287989836 width=135) + Filter Operator [FIL_382] (rows=285117733 width=123) predicate:((cs_bill_addr_sk BETWEEN DynamicValue(RS_60_customer_address_ca_address_sk_min) AND DynamicValue(RS_60_customer_address_ca_address_sk_max) and in_bloom_filter(cs_bill_addr_sk, DynamicValue(RS_60_customer_address_ca_address_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_66_item_i_item_sk_min) AND DynamicValue(RS_66_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_66_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_47] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + TableScan [TS_47] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_381] Group By Operator [GBY_380] (rows=1 width=12) @@ -306,7 +306,7 @@ Stage-0 SHUFFLE [RS_241] Group By Operator [GBY_240] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_239] (rows=508200 width=1436) + Select Operator [SEL_239] (rows=34340 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_294] <-Reducer 24 [BROADCAST_EDGE] vectorized @@ -317,35 +317,35 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_342] Group By Operator [GBY_339] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_335] (rows=18262 width=1119) + Select Operator [SEL_335] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_331] <-Reducer 30 [BROADCAST_EDGE] vectorized BROADCAST [RS_379] Group By Operator [GBY_378] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_358] Group By Operator [GBY_355] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_351] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_351] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_347] <-Reducer 13 [CONTAINS] vectorized Reduce Output Operator [RS_401] PartitionCols:_col0 - Group By Operator [GBY_400] (rows=670816147 width=108) + Group By Operator [GBY_400] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_399] (rows=670816147 width=108) + Top N Key Operator [TNK_399] (rows=5151 width=212) keys:_col0,sort order:+,top n:100 - Group By Operator [GBY_398] (rows=95833781 width=135) + Group By Operator [GBY_398] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=191667562 width=135) + Group By Operator [GBY_108] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_305] (rows=379339 width=201) Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_104] @@ -354,9 +354,9 @@ Stage-0 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col3 - Select Operator [SEL_100] (rows=174243235 width=135) + Select Operator [SEL_100] (rows=788222 width=110) Output:["_col3","_col5"] - Merge Join Operator [MERGEJOIN_302] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_302] (rows=788222 width=110) Conds:RS_97._col2=RS_352._col0(Inner),Output:["_col1","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_352] @@ -365,7 +365,7 @@ Stage-0 <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_97] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_301] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_301] (rows=3941109 width=118) Conds:RS_397._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_336] @@ -374,12 +374,12 @@ Stage-0 <-Map 33 [SIMPLE_EDGE] vectorized SHUFFLE [RS_397] PartitionCols:_col0 - Select Operator [SEL_396] (rows=144002668 width=135) + Select Operator [SEL_396] (rows=143931246 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_395] (rows=144002668 width=135) + Filter Operator [FIL_395] (rows=143931246 width=123) predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_98_customer_address_ca_address_sk_min) AND DynamicValue(RS_98_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_98_customer_address_ca_address_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_104_item_i_item_sk_min) AND DynamicValue(RS_104_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_104_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_85] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + TableScan [TS_85] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized BROADCAST [RS_394] Group By Operator [GBY_393] (rows=1 width=12) @@ -388,7 +388,7 @@ Stage-0 SHUFFLE [RS_281] Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_279] (rows=508200 width=1436) + Select Operator [SEL_279] (rows=34340 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_294] <-Reducer 27 [BROADCAST_EDGE] vectorized @@ -399,35 +399,35 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_343] Group By Operator [GBY_340] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_337] (rows=18262 width=1119) + Select Operator [SEL_337] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_331] <-Reducer 31 [BROADCAST_EDGE] vectorized BROADCAST [RS_392] Group By Operator [GBY_391] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_359] Group By Operator [GBY_356] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_353] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_353] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_347] <-Reducer 4 [CONTAINS] vectorized Reduce Output Operator [RS_370] PartitionCols:_col0 - Group By Operator [GBY_369] (rows=670816147 width=108) + Group By Operator [GBY_369] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_368] (rows=670816147 width=108) + Top N Key Operator [TNK_368] (rows=5151 width=212) keys:_col0,sort order:+,top n:100 - Group By Operator [GBY_367] (rows=383325119 width=88) + Group By Operator [GBY_367] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=766650239 width=88) + Group By Operator [GBY_33] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_303] (rows=1384530 width=100) Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_29] @@ -436,9 +436,9 @@ Stage-0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Select Operator [SEL_25] (rows=696954748 width=88) + Select Operator [SEL_25] (rows=2876890 width=4) Output:["_col3","_col5"] - Merge Join Operator [MERGEJOIN_296] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_296] (rows=2876890 width=4) Conds:RS_22._col2=RS_348._col0(Inner),Output:["_col1","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_348] @@ -447,7 +447,7 @@ Stage-0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_295] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_295] (rows=14384447 width=4) Conds:RS_366._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_332] @@ -456,12 +456,12 @@ Stage-0 <-Map 17 [SIMPLE_EDGE] vectorized SHUFFLE [RS_366] PartitionCols:_col0 - Select Operator [SEL_365] (rows=575995635 width=88) + Select Operator [SEL_365] (rows=525327191 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_364] (rows=575995635 width=88) + Filter Operator [FIL_364] (rows=525327191 width=118) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_23_customer_address_ca_address_sk_min) AND DynamicValue(RS_23_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_23_customer_address_ca_address_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_item_i_item_sk_min) AND DynamicValue(RS_29_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + TableScan [TS_10] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 21 [BROADCAST_EDGE] vectorized BROADCAST [RS_345] Group By Operator [GBY_344] (rows=1 width=12) @@ -470,18 +470,18 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_341] Group By Operator [GBY_338] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_333] (rows=18262 width=1119) + Select Operator [SEL_333] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_331] <-Reducer 29 [BROADCAST_EDGE] vectorized BROADCAST [RS_361] Group By Operator [GBY_360] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=8000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_357] Group By Operator [GBY_354] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_349] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=8000000)"] + Select Operator [SEL_349] (rows=8000000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_347] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -492,7 +492,7 @@ Stage-0 SHUFFLE [RS_201] Group By Operator [GBY_200] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_199] (rows=508200 width=1436) + Select Operator [SEL_199] (rows=34340 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_294] diff --git a/ql/src/test/results/clientpositive/perf/tez/query61.q.out b/ql/src/test/results/clientpositive/perf/tez/query61.q.out index deb05d0e45..dc18d84d46 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query61.q.out @@ -104,15 +104,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Reducer 18 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Map 31 <- Reducer 10 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) +Map 12 <- Reducer 18 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Map 30 <- Reducer 10 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 16 <- Map 29 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) @@ -122,7 +122,6 @@ Reducer 25 <- Map 23 (CUSTOM_SIMPLE_EDGE) Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) Reducer 28 <- Map 26 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) @@ -135,115 +134,115 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_339] - Limit [LIM_338] (rows=1 width=225) + File Output Operator [FS_334] + Limit [LIM_333] (rows=1 width=336) Number of rows:100 - Select Operator [SEL_337] (rows=1 width=225) + Select Operator [SEL_332] (rows=1 width=336) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_88] - Select Operator [SEL_87] (rows=1 width=225) + Select Operator [SEL_87] (rows=1 width=336) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_266] (rows=1 width=225) + Merge Join Operator [MERGEJOIN_266] (rows=1 width=224) Conds:(Inner),Output:["_col0","_col1"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_323] - Group By Operator [GBY_322] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_318] + Group By Operator [GBY_317] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_42] Group By Operator [GBY_41] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col9)"] - Merge Join Operator [MERGEJOIN_264] (rows=927646829 width=88) + Merge Join Operator [MERGEJOIN_264] (rows=505397 width=0) Conds:RS_37._col0=RS_38._col2(Inner),Output:["_col9"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_255] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_255] (rows=16000001 width=4) Conds:RS_269._col1=RS_272._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_269] PartitionCols:_col1 - Select Operator [SEL_268] (rows=80000000 width=860) + Select Operator [SEL_268] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_267] (rows=80000000 width=860) + Filter Operator [FIL_267] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_272] PartitionCols:_col0 - Select Operator [SEL_271] (rows=20000000 width=1014) + Select Operator [SEL_271] (rows=8000000 width=116) Output:["_col0"] - Filter Operator [FIL_270] (rows=20000000 width=1014) + Filter Operator [FIL_270] (rows=8000000 width=112) predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] + TableScan [TS_3] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_259] (rows=843315281 width=88) - Conds:RS_30._col4=RS_311._col0(Inner),Output:["_col2","_col5"] + Merge Join Operator [MERGEJOIN_259] (rows=2526982 width=0) + Conds:RS_30._col4=RS_316._col0(Inner),Output:["_col2","_col5"] <-Map 29 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_311] + SHUFFLE [RS_316] PartitionCols:_col0 - Select Operator [SEL_310] (rows=2300 width=1179) + Select Operator [SEL_315] (rows=2300 width=259) Output:["_col0"] - Filter Operator [FIL_309] (rows=2300 width=1179) + Filter Operator [FIL_314] (rows=2300 width=259) predicate:(((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) - TableScan [TS_18] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_dmail","p_channel_email","p_channel_tv"] + TableScan [TS_18] (rows=2300 width=259) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_dmail","p_channel_email","p_channel_tv"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_258] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_258] (rows=2526982 width=0) Conds:RS_27._col3=RS_299._col0(Inner),Output:["_col2","_col4","_col5"] <-Map 26 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_299] PartitionCols:_col0 - Select Operator [SEL_298] (rows=852 width=1910) + Select Operator [SEL_298] (rows=341 width=116) Output:["_col0"] - Filter Operator [FIL_297] (rows=852 width=1910) + Filter Operator [FIL_297] (rows=341 width=115) predicate:((s_gmt_offset = -7) and s_store_sk is not null) - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_gmt_offset"] + TableScan [TS_15] (rows=1704 width=115) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_gmt_offset"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_257] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_257] (rows=12627499 width=0) Conds:RS_24._col1=RS_287._col0(Inner),Output:["_col2","_col3","_col4","_col5"] <-Map 23 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_287] PartitionCols:_col0 - Select Operator [SEL_286] (rows=231000 width=1436) + Select Operator [SEL_286] (rows=46200 width=99) Output:["_col0"] - Filter Operator [FIL_285] (rows=231000 width=1436) + Filter Operator [FIL_285] (rows=46200 width=94) predicate:((i_category = 'Electronics') and i_item_sk is not null) - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_category"] + TableScan [TS_12] (rows=462000 width=94) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_256] (rows=633595212 width=88) - Conds:RS_321._col0=RS_275._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_256] (rows=13119234 width=4) + Conds:RS_313._col0=RS_275._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 17 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_275] PartitionCols:_col0 - Select Operator [SEL_274] (rows=18262 width=1119) + Select Operator [SEL_274] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_273] (rows=18262 width=1119) + Filter Operator [FIL_273] (rows=50 width=12) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_313] PartitionCols:_col0 - Select Operator [SEL_320] (rows=575995635 width=88) + Select Operator [SEL_312] (rows=479120970 width=126) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_319] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_37_customer_c_customer_sk_min) AND DynamicValue(RS_37_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_37_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_31_promotion_p_promo_sk_min) AND DynamicValue(RS_31_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_31_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_28_store_s_store_sk_min) AND DynamicValue(RS_28_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_28_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_promo_sk","ss_ext_sales_price"] + Filter Operator [FIL_311] (rows=479120970 width=126) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_37_customer_c_customer_sk_min) AND DynamicValue(RS_37_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_37_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_28_store_s_store_sk_min) AND DynamicValue(RS_28_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_28_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=126) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_promo_sk","ss_ext_sales_price"] <-Reducer 18 [BROADCAST_EDGE] vectorized BROADCAST [RS_284] Group By Operator [GBY_283] (rows=1 width=12) @@ -252,7 +251,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_281] Group By Operator [GBY_279] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_276] (rows=18262 width=1119) + Select Operator [SEL_276] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_274] <-Reducer 24 [BROADCAST_EDGE] vectorized @@ -263,7 +262,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_293] Group By Operator [GBY_291] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_288] (rows=231000 width=1436) + Select Operator [SEL_288] (rows=46200 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_286] <-Reducer 27 [BROADCAST_EDGE] vectorized @@ -274,40 +273,29 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_305] Group By Operator [GBY_303] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_300] (rows=852 width=1910) + Select Operator [SEL_300] (rows=341 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_298] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_316] - Group By Operator [GBY_315] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_314] - Group By Operator [GBY_313] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_312] (rows=2300 width=1179) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_310] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_318] - Group By Operator [GBY_317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] + BROADCAST [RS_310] + Group By Operator [GBY_309] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14591048)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_182] Group By Operator [GBY_181] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] - Select Operator [SEL_180] (rows=88000001 width=860) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14591048)"] + Select Operator [SEL_180] (rows=16000001 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_255] <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Group By Operator [GBY_335] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_331] + Group By Operator [GBY_330] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_81] Group By Operator [GBY_80] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col8)"] - Merge Join Operator [MERGEJOIN_265] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_265] (rows=529208 width=0) Conds:RS_76._col0=RS_77._col2(Inner),Output:["_col8"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_76] @@ -316,7 +304,7 @@ Stage-0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_263] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_263] (rows=2646038 width=0) Conds:RS_69._col3=RS_301._col0(Inner),Output:["_col2","_col4"] <-Map 26 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_301] @@ -325,7 +313,7 @@ Stage-0 <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_262] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_262] (rows=13222427 width=0) Conds:RS_66._col1=RS_289._col0(Inner),Output:["_col2","_col3","_col4"] <-Map 23 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_289] @@ -334,63 +322,63 @@ Stage-0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_261] (rows=633595212 width=88) - Conds:RS_334._col0=RS_277._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_261] (rows=13737330 width=4) + Conds:RS_329._col0=RS_277._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 17 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_277] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_274] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] PartitionCols:_col0 - Select Operator [SEL_333] (rows=575995635 width=88) + Select Operator [SEL_328] (rows=501694138 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_332] (rows=575995635 width=88) + Filter Operator [FIL_327] (rows=501694138 width=122) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_76_customer_c_customer_sk_min) AND DynamicValue(RS_76_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_76_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_67_item_i_item_sk_min) AND DynamicValue(RS_67_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_67_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_64_date_dim_d_date_sk_min) AND DynamicValue(RS_64_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_64_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_70_store_s_store_sk_min) AND DynamicValue(RS_70_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_70_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_51] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] + TableScan [TS_51] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_331] - Group By Operator [GBY_330] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] + BROADCAST [RS_326] + Group By Operator [GBY_325] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14591048)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_237] Group By Operator [GBY_236] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] - Select Operator [SEL_235] (rows=88000001 width=860) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14591048)"] + Select Operator [SEL_235] (rows=16000001 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_255] <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_325] - Group By Operator [GBY_324] (rows=1 width=12) + BROADCAST [RS_320] + Group By Operator [GBY_319] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_282] Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_278] (rows=18262 width=1119) + Select Operator [SEL_278] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_274] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_327] - Group By Operator [GBY_326] (rows=1 width=12) + BROADCAST [RS_322] + Group By Operator [GBY_321] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_294] Group By Operator [GBY_292] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_290] (rows=231000 width=1436) + Select Operator [SEL_290] (rows=46200 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_286] <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_329] - Group By Operator [GBY_328] (rows=1 width=12) + BROADCAST [RS_324] + Group By Operator [GBY_323] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_306] Group By Operator [GBY_304] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_302] (rows=852 width=1910) + Select Operator [SEL_302] (rows=341 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_298] diff --git a/ql/src/test/results/clientpositive/perf/tez/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/query63.q.out index d05da86f49..6a6ffb7e3d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query63.q.out @@ -67,9 +67,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -82,78 +81,78 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_116] - Limit [LIM_115] (rows=100 width=88) + File Output Operator [FS_111] + Limit [LIM_110] (rows=71 width=228) Number of rows:100 - Select Operator [SEL_114] (rows=191662559 width=88) + Select Operator [SEL_109] (rows=71 width=228) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=191662559 width=88) + Select Operator [SEL_30] (rows=71 width=228) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_46] (rows=191662559 width=88) + Filter Operator [FIL_46] (rows=71 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_29] (rows=383325119 width=88) + Select Operator [SEL_29] (rows=143 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_28] (rows=383325119 width=88) + PTF Operator [PTF_28] (rows=143 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_25] (rows=383325119 width=88) + Select Operator [SEL_25] (rows=143 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_24] (rows=383325119 width=88) + Group By Operator [GBY_24] (rows=143 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=766650239 width=88) + Group By Operator [GBY_22] (rows=143 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col11 - Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) - Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col8","_col11"] + Merge Join Operator [MERGEJOIN_84] (rows=129200 width=8) + Conds:RS_18._col2=RS_106._col0(Inner),Output:["_col3","_col8","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_102] (rows=1704 width=1910) + Select Operator [SEL_105] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_101] (rows=1704 width=1910) + Filter Operator [FIL_104] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + TableScan [TS_9] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_83] (rows=129200 width=8) Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col8","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_94] (rows=73049 width=1119) + Select Operator [SEL_94] (rows=317 width=12) Output:["_col0","_col2"] - Filter Operator [FIL_93] (rows=73049 width=1119) + Filter Operator [FIL_93] (rows=317 width=12) predicate:((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_moy"] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_moy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) - Conds:RS_111._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col8"] + Merge Join Operator [MERGEJOIN_82] (rows=744232 width=4) + Conds:RS_103._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col8"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=462000 width=1436) + Select Operator [SEL_86] (rows=68 width=290) Output:["_col0","_col4"] - Filter Operator [FIL_85] (rows=462000 width=1436) + Filter Operator [FIL_85] (rows=68 width=290) predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] + TableScan [TS_3] (rows=462000 width=289) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + SHUFFLE [RS_103] PartitionCols:_col1 - Select Operator [SEL_110] (rows=575995635 width=88) + Select Operator [SEL_102] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_109] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + Filter Operator [FIL_101] (rows=525329897 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_100] Group By Operator [GBY_99] (rows=1 width=12) @@ -162,20 +161,9 @@ Stage-0 SHUFFLE [RS_98] Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=73049 width=1119) + Select Operator [SEL_96] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_94] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_92] Group By Operator [GBY_91] (rows=1 width=12) @@ -184,7 +172,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=462000 width=1436) + Select Operator [SEL_88] (rows=68 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index 402dadefde..f670c4f4e2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -265,67 +265,50 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 40 <- Reducer 23 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE), Reducer 70 (BROADCAST_EDGE) -Map 49 <- Reducer 43 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) -Map 72 <- Reducer 31 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE), Reducer 39 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE), Reducer 68 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) -Map 73 <- Reducer 45 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) -Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE) +Map 44 <- Reducer 40 (BROADCAST_EDGE) +Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE) +Map 56 <- Reducer 42 (BROADCAST_EDGE) +Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 30 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 13 <- Map 69 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 17 <- Map 46 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 51 (ONE_TO_ONE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 20 <- Map 61 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 37 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 64 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 15 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) -Reducer 25 <- Map 46 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 57 (ONE_TO_ONE_EDGE) -Reducer 28 <- Map 61 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 37 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 64 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 38 <- Map 37 (CUSTOM_SIMPLE_EDGE) -Reducer 39 <- Map 37 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) -Reducer 43 <- Map 42 (CUSTOM_SIMPLE_EDGE) -Reducer 44 <- Map 42 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) -Reducer 45 <- Map 42 (CUSTOM_SIMPLE_EDGE) -Reducer 47 <- Map 46 (CUSTOM_SIMPLE_EDGE) -Reducer 48 <- Map 46 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) -Reducer 51 <- Reducer 50 (SIMPLE_EDGE) -Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE) -Reducer 54 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 55 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 56 <- Map 53 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) -Reducer 57 <- Reducer 56 (SIMPLE_EDGE) -Reducer 58 <- Reducer 57 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 69 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 60 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 62 <- Map 61 (CUSTOM_SIMPLE_EDGE) -Reducer 63 <- Map 61 (CUSTOM_SIMPLE_EDGE) -Reducer 65 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 66 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 67 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 68 <- Map 64 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 70 <- Map 69 (CUSTOM_SIMPLE_EDGE) -Reducer 71 <- Map 69 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 69 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 12 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 31 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 14 <- Map 54 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) +Reducer 18 <- Map 43 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 46 (ONE_TO_ONE_EDGE) +Reducer 21 <- Map 52 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 36 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 53 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 16 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) +Reducer 26 <- Map 43 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 50 (ONE_TO_ONE_EDGE) +Reducer 29 <- Map 52 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 36 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 53 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 38 <- Map 37 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE) +Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) +Reducer 42 <- Map 39 (CUSTOM_SIMPLE_EDGE) +Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) +Reducer 46 <- Reducer 45 (SIMPLE_EDGE) +Reducer 47 <- Reducer 46 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE) +Reducer 5 <- Map 36 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 50 <- Reducer 49 (SIMPLE_EDGE) +Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 54 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 54 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 @@ -333,697 +316,501 @@ Stage-0 limit:-1 Stage-1 Reducer 11 vectorized - File Output Operator [FS_1283] - Select Operator [SEL_1282] (rows=273897192 width=88) + File Output Operator [FS_1201] + Select Operator [SEL_1200] (rows=2169965329 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_259] - Select Operator [SEL_258] (rows=273897192 width=88) + Select Operator [SEL_258] (rows=2169965329 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_257] (rows=273897192 width=88) + Filter Operator [FIL_257] (rows=2169965329 width=1694) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_1087] (rows=821691577 width=88) - Conds:RS_1239._col2, _col1, _col3=RS_1281._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1281] - PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1280] (rows=746992327 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_1279] (rows=746992327 width=88) + Merge Join Operator [MERGEJOIN_1087] (rows=6509895988 width=1694) + Conds:RS_1171._col2, _col1, _col3=RS_1199._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1171] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_1170] (rows=2299138 width=1354) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Group By Operator [GBY_1169] (rows=2299138 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_251] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_124] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_250] (rows=1493984654 width=88) + Group By Operator [GBY_123] (rows=2299138 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 - Select Operator [SEL_249] (rows=1493984654 width=88) + Select Operator [SEL_122] (rows=2331650 width=1292) Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] - Filter Operator [FIL_248] (rows=1493984654 width=88) + Filter Operator [FIL_121] (rows=2331650 width=1292) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1086] (rows=1493984654 width=88) - Conds:RS_245._col37=RS_1129._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] - <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1129] + Merge Join Operator [MERGEJOIN_1068] (rows=2331650 width=1292) + Conds:RS_118._col37=RS_1120._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1120] PartitionCols:_col0 - Select Operator [SEL_1125] (rows=1861800 width=385) + Select Operator [SEL_1119] (rows=1861800 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_1124] (rows=1861800 width=385) + Filter Operator [FIL_1118] (rows=1861800 width=89) predicate:cd_demo_sk is not null - TableScan [TS_97] (rows=1861800 width=385) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_245] + TableScan [TS_97] (rows=1861800 width=89) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_118] PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1085] (rows=1358167838 width=88) - Conds:RS_242._col0=RS_243._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] + Merge Join Operator [MERGEJOIN_1067] (rows=2299138 width=1205) + Conds:RS_115._col0=RS_116._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_242] + SHUFFLE [RS_115] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1056] (rows=128840811 width=860) - Conds:RS_112._col1=RS_1128._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] - <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1128] + Merge Join Operator [MERGEJOIN_1056] (rows=70357394 width=458) + Conds:RS_112._col1=RS_1121._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1121] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1125] + Please refer to the previous Select Operator [SEL_1119] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1055] (rows=117128008 width=860) + Merge Join Operator [MERGEJOIN_1055] (rows=69376329 width=376) Conds:RS_109._col3=RS_1115._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] - <-Map 37 [SIMPLE_EDGE] vectorized + <-Map 36 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1115] PartitionCols:_col0 - Select Operator [SEL_1114] (rows=40000000 width=1014) + Select Operator [SEL_1114] (rows=40000000 width=365) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1113] (rows=40000000 width=1014) + Filter Operator [FIL_1113] (rows=40000000 width=365) predicate:ca_address_sk is not null - TableScan [TS_19] (rows=40000000 width=1014) - default@customer_address,ad2,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] + TableScan [TS_19] (rows=40000000 width=365) + default@customer_address,ad2,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_1054] (rows=106480005 width=860) + Merge Join Operator [MERGEJOIN_1054] (rows=69376329 width=19) Conds:RS_106._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] - <-Reducer 33 [SIMPLE_EDGE] + <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1053] (rows=7920 width=107) + Merge Join Operator [MERGEJOIN_1053] (rows=7200 width=4) Conds:RS_1109._col1=RS_1112._col0(Inner),Output:["_col0"] - <-Map 32 [SIMPLE_EDGE] vectorized + <-Map 33 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1109] PartitionCols:_col1 - Select Operator [SEL_1108] (rows=7200 width=107) + Select Operator [SEL_1108] (rows=7200 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_1107] (rows=7200 width=107) + Filter Operator [FIL_1107] (rows=7200 width=8) predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,hd2,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Map 36 [SIMPLE_EDGE] vectorized + TableScan [TS_9] (rows=7200 width=8) + default@household_demographics,hd2,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Map 35 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1112] PartitionCols:_col0 - Select Operator [SEL_1111] (rows=20 width=12) + Select Operator [SEL_1111] (rows=20 width=4) Output:["_col0"] - Filter Operator [FIL_1110] (rows=20 width=12) + Filter Operator [FIL_1110] (rows=20 width=4) predicate:ib_income_band_sk is not null - TableScan [TS_12] (rows=20 width=12) - default@income_band,ib2,Tbl:COMPLETE,Col:NONE,Output:["ib_income_band_sk"] + TableScan [TS_12] (rows=20 width=4) + default@income_band,ib2,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_1052] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_1052] (rows=69376329 width=23) Conds:RS_103._col4=RS_1098._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] - <-Map 15 [SIMPLE_EDGE] vectorized + <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1098] PartitionCols:_col0 - Select Operator [SEL_1094] (rows=73049 width=1119) + Select Operator [SEL_1094] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_1091] (rows=73049 width=1119) + Filter Operator [FIL_1091] (rows=73049 width=8) predicate:d_date_sk is not null - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1051] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_1051] (rows=69376329 width=23) Conds:RS_1090._col5=RS_1097._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] - <-Map 15 [SIMPLE_EDGE] vectorized + <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_1097] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1094] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1090] PartitionCols:_col5 - Select Operator [SEL_1089] (rows=80000000 width=860) + Select Operator [SEL_1089] (rows=69376329 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1088] (rows=80000000 width=860) + Filter Operator [FIL_1088] (rows=69376329 width=23) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_243] + TableScan [TS_0] (rows=80000000 width=23) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_116] PartitionCols:_col16 - Select Operator [SEL_223] (rows=1234698008 width=88) + Select Operator [SEL_96] (rows=2651207 width=784) Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_1084] (rows=1234698008 width=88) - Conds:RS_220._col5, _col12=RS_1190._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 64 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1190] + Merge Join Operator [MERGEJOIN_1066] (rows=2651207 width=784) + Conds:RS_93._col5, _col12=RS_1167._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1167] PartitionCols:_col0, _col1 - Select Operator [SEL_1186] (rows=57591150 width=77) + Select Operator [SEL_1166] (rows=57591150 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_1185] (rows=57591150 width=77) + Filter Operator [FIL_1165] (rows=57591150 width=8) predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_75] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_220] + TableScan [TS_75] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_93] PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1083] (rows=1122452711 width=88) - Conds:RS_217._col9=RS_1118._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1118] + Merge Join Operator [MERGEJOIN_1065] (rows=1608052 width=657) + Conds:RS_90._col9=RS_1116._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1116] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1114] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_217] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_90] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1082] (rows=1020411534 width=88) - Conds:RS_214._col10=RS_1220._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] - <-Map 61 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1220] + Merge Join Operator [MERGEJOIN_1064] (rows=1608052 width=296) + Conds:RS_87._col10=RS_1163._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1163] PartitionCols:_col0 - Select Operator [SEL_1217] (rows=1704 width=1910) + Select Operator [SEL_1162] (rows=1704 width=181) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1216] (rows=1704 width=1910) + Filter Operator [FIL_1161] (rows=1704 width=181) predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) - TableScan [TS_69] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_214] + TableScan [TS_69] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_87] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1081] (rows=927646829 width=88) - Conds:RS_211._col5=RS_1262._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 57 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1262] + Merge Join Operator [MERGEJOIN_1063] (rows=1608052 width=119) + Conds:RS_84._col5=RS_1148._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 46 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1148] PartitionCols:_col0 - Select Operator [SEL_1261] (rows=52798137 width=135) + Select Operator [SEL_1147] (rows=13257 width=228) Output:["_col0"] - Filter Operator [FIL_1260] (rows=52798137 width=135) + Filter Operator [FIL_1146] (rows=13257 width=228) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1259] (rows=158394413 width=135) + Group By Operator [GBY_1145] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 56 [SIMPLE_EDGE] - SHUFFLE [RS_192] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_65] PartitionCols:_col0 - Group By Operator [GBY_191] (rows=316788826 width=135) + Group By Operator [GBY_64] (rows=12806906 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_189] (rows=316788826 width=135) + Select Operator [SEL_62] (rows=183085709 width=450) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1079] (rows=316788826 width=135) - Conds:RS_1258._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1170] + Merge Join Operator [MERGEJOIN_1061] (rows=183085709 width=450) + Conds:RS_1140._col0, _col1=RS_1143._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1143] PartitionCols:_col0, _col1 - Select Operator [SEL_1166] (rows=28798881 width=106) + Select Operator [SEL_1142] (rows=28798881 width=337) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1165] (rows=28798881 width=106) + Filter Operator [FIL_1141] (rows=28798881 width=337) predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_56] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1258] + TableScan [TS_56] (rows=28798881 width=337) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 44 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1140] PartitionCols:_col0, _col1 - Select Operator [SEL_1257] (rows=287989836 width=135) + Select Operator [SEL_1139] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1256] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_187_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_187_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_187_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_187_catalog_returns_cr_order_number_min) AND DynamicValue(RS_187_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_187_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_180] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1242] - Group By Operator [GBY_1240] (rows=1 width=12) + Filter Operator [FIL_1138] (rows=287989836 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_53] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 40 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1135] + Group By Operator [GBY_1133] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1144] - Group By Operator [GBY_1142] (rows=1 width=12) + <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1131] + Group By Operator [GBY_1129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1140] (rows=5703 width=1436) + Select Operator [SEL_1126] (rows=518 width=4) Output:["_col0"] - Select Operator [SEL_1136] (rows=5703 width=1436) + Select Operator [SEL_1124] (rows=518 width=312) Output:["_col0","_col3"] - Filter Operator [FIL_1135] (rows=5703 width=1436) + Filter Operator [FIL_1123] (rows=518 width=312) predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Reducer 67 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1255] - Group By Operator [GBY_1253] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1199] - Group By Operator [GBY_1195] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1191] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 59 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1250] - Group By Operator [GBY_1249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1179] - Group By Operator [GBY_1175] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1171] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 60 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1252] - Group By Operator [GBY_1251] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1180] - Group By Operator [GBY_1176] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1172] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_211] + TableScan [TS_34] (rows=462000 width=311) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_84] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1080] (rows=843315281 width=88) - Conds:RS_208._col0=RS_209._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_208] + Merge Join Operator [MERGEJOIN_1062] (rows=1608052 width=119) + Conds:RS_81._col0=RS_82._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_81] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_209] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_82] PartitionCols:_col5 - Select Operator [SEL_179] (rows=766650239 width=88) + Select Operator [SEL_52] (rows=1608052 width=119) Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_1078] (rows=766650239 width=88) - Conds:RS_176._col7=RS_1155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1155] + Merge Join Operator [MERGEJOIN_1060] (rows=1608052 width=119) + Conds:RS_49._col7=RS_1159._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1159] PartitionCols:_col0 - Select Operator [SEL_1152] (rows=2300 width=1179) + Select Operator [SEL_1158] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_1151] (rows=2300 width=1179) + Filter Operator [FIL_1157] (rows=2300 width=4) predicate:p_promo_sk is not null - TableScan [TS_40] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_176] + TableScan [TS_40] (rows=2300 width=4) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1077] (rows=696954748 width=88) - Conds:RS_173._col0=RS_1101._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1101] + Merge Join Operator [MERGEJOIN_1059] (rows=1608052 width=119) + Conds:RS_46._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1099] PartitionCols:_col0 - Select Operator [SEL_1096] (rows=36524 width=1119) + Select Operator [SEL_1095] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_1093] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) + Filter Operator [FIL_1092] (rows=652 width=8) + predicate:((d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] - <-Reducer 44 [SIMPLE_EDGE] - SHUFFLE [RS_173] + <-Reducer 38 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1076] (rows=633595212 width=88) - Conds:RS_1278._col1=RS_1139._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 42 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1139] + Merge Join Operator [MERGEJOIN_1058] (rows=4503592 width=119) + Conds:RS_1156._col1=RS_1125._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 39 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1125] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1136] - <-Map 72 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1278] + Please refer to the previous Select Operator [SEL_1124] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1156] PartitionCols:_col1 - Select Operator [SEL_1277] (rows=575995635 width=88) + Select Operator [SEL_1155] (rows=417313408 width=355) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1276] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_218_ad1_ca_address_sk_min) AND DynamicValue(RS_218_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_218_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_208_hd1_hd_demo_sk_min) AND DynamicValue(RS_208_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_208_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_212_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_212_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_212_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_177_promotion_p_promo_sk_min) AND DynamicValue(RS_177_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_177_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_174_d1_d_date_sk_min) AND DynamicValue(RS_174_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_174_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_215_store_s_store_sk_min) AND DynamicValue(RS_215_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_215_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_221_store_returns_sr_ticket_number_min) AND DynamicValue(RS_221_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_221_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_158] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1148] - Group By Operator [GBY_1145] (rows=1 width=12) + Filter Operator [FIL_1154] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_85_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_85_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_85_catalog_sales_cs_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_47_d1_d_date_sk_min) AND DynamicValue(RS_47_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_47_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_31] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 40 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1134] + Please refer to the previous Group By Operator [GBY_1133] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1137] + Group By Operator [GBY_1136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1143] - Group By Operator [GBY_1141] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1138] (rows=5703 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1136] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1241] - Please refer to the previous Group By Operator [GBY_1240] - <-Reducer 67 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1254] - Please refer to the previous Group By Operator [GBY_1253] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1244] - Group By Operator [GBY_1243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1106] - Group By Operator [GBY_1104] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1102] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1096] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1248] - Group By Operator [GBY_1247] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_909] - Group By Operator [GBY_908] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_907] (rows=7920 width=107) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 39 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1271] - Group By Operator [GBY_1270] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1123] - Group By Operator [GBY_1121] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1119] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1114] - <-Reducer 48 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1246] - Group By Operator [GBY_1245] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1160] - Group By Operator [GBY_1158] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1105] + Group By Operator [GBY_1103] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1156] (rows=2300 width=1179) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1152] - <-Reducer 58 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1267] - Group By Operator [GBY_1266] (rows=1 width=228) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1265] - Group By Operator [GBY_1264] (rows=1 width=228) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] - Select Operator [SEL_1263] (rows=52798137 width=135) + Select Operator [SEL_1100] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1261] - <-Reducer 63 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1269] - Group By Operator [GBY_1268] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1095] + <-Reducer 47 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1153] + Group By Operator [GBY_1152] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1225] - Group By Operator [GBY_1223] (rows=1 width=12) + <-Reducer 46 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1151] + Group By Operator [GBY_1150] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1221] (rows=1704 width=1910) + Select Operator [SEL_1149] (rows=13257 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1217] - <-Reducer 68 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1273] - Group By Operator [GBY_1272] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1200] - Group By Operator [GBY_1196] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1192] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 71 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1275] - Group By Operator [GBY_1274] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1134] - Group By Operator [GBY_1132] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1130] (rows=1861800 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1125] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1239] - PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1238] (rows=746992327 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_1237] (rows=746992327 width=88) + Please refer to the previous Select Operator [SEL_1147] + <-Reducer 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1199] + PartitionCols:_col1, _col0, _col2 + Select Operator [SEL_1198] (rows=2299138 width=525) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_1197] (rows=2299138 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_124] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_251] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_123] (rows=1493984654 width=88) + Group By Operator [GBY_250] (rows=2299138 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col49 - Select Operator [SEL_122] (rows=1493984654 width=88) + Select Operator [SEL_249] (rows=2331650 width=1292) Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] - Filter Operator [FIL_121] (rows=1493984654 width=88) + Filter Operator [FIL_248] (rows=2331650 width=1292) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1068] (rows=1493984654 width=88) - Conds:RS_118._col37=RS_1126._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] - <-Map 69 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1126] + Merge Join Operator [MERGEJOIN_1086] (rows=2331650 width=1292) + Conds:RS_245._col37=RS_1122._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1122] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1125] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_118] + Please refer to the previous Select Operator [SEL_1119] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_245] PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1067] (rows=1358167838 width=88) - Conds:RS_115._col0=RS_116._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] + Merge Join Operator [MERGEJOIN_1085] (rows=2299138 width=1205) + Conds:RS_242._col0=RS_243._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_115] + SHUFFLE [RS_242] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_1056] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_116] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_243] PartitionCols:_col16 - Select Operator [SEL_96] (rows=1234698008 width=88) + Select Operator [SEL_223] (rows=2651207 width=784) Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_1066] (rows=1234698008 width=88) - Conds:RS_93._col5, _col12=RS_1187._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 64 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1187] + Merge Join Operator [MERGEJOIN_1084] (rows=2651207 width=784) + Conds:RS_220._col5, _col12=RS_1168._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1168] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_93] + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_220] PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1065] (rows=1122452711 width=88) - Conds:RS_90._col9=RS_1116._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1116] + Merge Join Operator [MERGEJOIN_1083] (rows=1608052 width=657) + Conds:RS_217._col9=RS_1117._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1117] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1114] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_90] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_217] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1064] (rows=1020411534 width=88) - Conds:RS_87._col10=RS_1218._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] - <-Map 61 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1218] + Merge Join Operator [MERGEJOIN_1082] (rows=1608052 width=296) + Conds:RS_214._col10=RS_1164._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1164] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1217] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_87] + Please refer to the previous Select Operator [SEL_1162] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_214] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1063] (rows=927646829 width=88) - Conds:RS_84._col5=RS_1210._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 51 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1210] + Merge Join Operator [MERGEJOIN_1081] (rows=1608052 width=119) + Conds:RS_211._col5=RS_1186._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 50 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1186] PartitionCols:_col0 - Select Operator [SEL_1209] (rows=52798137 width=135) + Select Operator [SEL_1185] (rows=13257 width=228) Output:["_col0"] - Filter Operator [FIL_1208] (rows=52798137 width=135) + Filter Operator [FIL_1184] (rows=13257 width=228) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1207] (rows=158394413 width=135) + Group By Operator [GBY_1183] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 50 [SIMPLE_EDGE] - SHUFFLE [RS_65] + <-Reducer 49 [SIMPLE_EDGE] + SHUFFLE [RS_192] PartitionCols:_col0 - Group By Operator [GBY_64] (rows=316788826 width=135) + Group By Operator [GBY_191] (rows=12806906 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_62] (rows=316788826 width=135) + Select Operator [SEL_189] (rows=183085709 width=450) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1061] (rows=316788826 width=135) - Conds:RS_1206._col0, _col1=RS_1167._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1167] + Merge Join Operator [MERGEJOIN_1079] (rows=183085709 width=450) + Conds:RS_1182._col0, _col1=RS_1144._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1144] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1166] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1206] + Please refer to the previous Select Operator [SEL_1142] + <-Map 56 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1182] PartitionCols:_col0, _col1 - Select Operator [SEL_1205] (rows=287989836 width=135) + Select Operator [SEL_1181] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1204] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_60_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_60_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_60_catalog_returns_cr_order_number_min) AND DynamicValue(RS_60_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_60_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_53] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1147] - Please refer to the previous Group By Operator [GBY_1145] - <-Reducer 65 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1203] - Group By Operator [GBY_1201] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1197] - Group By Operator [GBY_1193] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1188] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 54 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1182] - Group By Operator [GBY_1181] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1177] - Group By Operator [GBY_1173] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1168] (rows=28798881 width=106) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 55 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1184] - Group By Operator [GBY_1183] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1178] - Group By Operator [GBY_1174] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1169] (rows=28798881 width=106) + Filter Operator [FIL_1180] (rows=287989836 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_180] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1177] + Group By Operator [GBY_1175] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1132] + Group By Operator [GBY_1130] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1128] (rows=518 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1166] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_84] + Please refer to the previous Select Operator [SEL_1124] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_211] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1062] (rows=843315281 width=88) - Conds:RS_81._col0=RS_82._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_81] + Merge Join Operator [MERGEJOIN_1080] (rows=1608052 width=119) + Conds:RS_208._col0=RS_209._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_208] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_82] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_209] PartitionCols:_col5 - Select Operator [SEL_52] (rows=766650239 width=88) + Select Operator [SEL_179] (rows=1608052 width=119) Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_1060] (rows=766650239 width=88) - Conds:RS_49._col7=RS_1153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1153] + Merge Join Operator [MERGEJOIN_1078] (rows=1608052 width=119) + Conds:RS_176._col7=RS_1160._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1160] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1152] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_49] + Please refer to the previous Select Operator [SEL_1158] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_176] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1059] (rows=696954748 width=88) - Conds:RS_46._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1099] + Merge Join Operator [MERGEJOIN_1077] (rows=1608052 width=119) + Conds:RS_173._col0=RS_1101._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1101] PartitionCols:_col0 - Select Operator [SEL_1095] (rows=36524 width=1119) + Select Operator [SEL_1096] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_1092] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) + Filter Operator [FIL_1093] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_46] + SHUFFLE [RS_173] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1058] (rows=633595212 width=88) - Conds:RS_1236._col1=RS_1137._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] - <-Map 42 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1137] + Merge Join Operator [MERGEJOIN_1076] (rows=4503592 width=119) + Conds:RS_1196._col1=RS_1127._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 39 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1127] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1136] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1236] + Please refer to the previous Select Operator [SEL_1124] + <-Map 55 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1196] PartitionCols:_col1 - Select Operator [SEL_1235] (rows=575995635 width=88) + Select Operator [SEL_1195] (rows=417313408 width=355) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1234] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_91_ad1_ca_address_sk_min) AND DynamicValue(RS_91_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_91_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_119_cd1_cd_demo_sk_min) AND DynamicValue(RS_119_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_119_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_81_hd1_hd_demo_sk_min) AND DynamicValue(RS_81_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_81_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_85_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_85_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_85_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_50_promotion_p_promo_sk_min) AND DynamicValue(RS_50_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_50_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_47_d1_d_date_sk_min) AND DynamicValue(RS_47_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_47_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_88_store_s_store_sk_min) AND DynamicValue(RS_88_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_88_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_94_store_returns_sr_ticket_number_min) AND DynamicValue(RS_94_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_94_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_31] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1146] - Please refer to the previous Group By Operator [GBY_1145] - <-Reducer 65 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1202] - Please refer to the previous Group By Operator [GBY_1201] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1150] - Group By Operator [GBY_1149] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1105] - Group By Operator [GBY_1103] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1100] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1095] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1164] - Group By Operator [GBY_1163] (rows=1 width=12) + Filter Operator [FIL_1194] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_212_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_212_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_212_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_254_item_i_item_sk_min) AND DynamicValue(RS_254_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_254_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_174_d1_d_date_sk_min) AND DynamicValue(RS_174_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_174_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_158] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1176] + Please refer to the previous Group By Operator [GBY_1175] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1193] + Group By Operator [GBY_1192] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_674] - Group By Operator [GBY_673] (rows=1 width=12) + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1174] + Group By Operator [GBY_1173] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_672] (rows=7920 width=107) + Select Operator [SEL_1172] (rows=2299138 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1229] - Group By Operator [GBY_1228] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1122] - Group By Operator [GBY_1120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1117] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1114] - <-Reducer 47 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1162] - Group By Operator [GBY_1161] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1170] + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1179] + Group By Operator [GBY_1178] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1159] - Group By Operator [GBY_1157] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1106] + Group By Operator [GBY_1104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1154] (rows=2300 width=1179) + Select Operator [SEL_1102] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1152] - <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1215] - Group By Operator [GBY_1214] (rows=1 width=228) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 51 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1213] - Group By Operator [GBY_1212] (rows=1 width=228) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] - Select Operator [SEL_1211] (rows=52798137 width=135) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1209] - <-Reducer 62 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1227] - Group By Operator [GBY_1226] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1096] + <-Reducer 51 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1191] + Group By Operator [GBY_1190] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1224] - Group By Operator [GBY_1222] (rows=1 width=12) + <-Reducer 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1189] + Group By Operator [GBY_1188] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1219] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1217] - <-Reducer 66 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1231] - Group By Operator [GBY_1230] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1198] - Group By Operator [GBY_1194] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1189] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1186] - <-Reducer 70 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1233] - Group By Operator [GBY_1232] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1133] - Group By Operator [GBY_1131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1127] (rows=1861800 width=385) + Select Operator [SEL_1187] (rows=13257 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1125] + Please refer to the previous Select Operator [SEL_1185] diff --git a/ql/src/test/results/clientpositive/perf/tez/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/query65.q.out index 3d030f9624..4c3644b16a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query65.q.out @@ -67,167 +67,158 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 12 <- Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE) +Map 14 <- Reducer 13 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=100 width=88) + Reducer 7 vectorized + File Output Operator [FS_177] + Limit [LIM_176] (rows=100 width=705) Number of rows:100 - Select Operator [SEL_174] (rows=255550079 width=88) + Select Operator [SEL_175] (rows=65392 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_48] - Select Operator [SEL_47] (rows=255550079 width=88) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Select Operator [SEL_49] (rows=65392 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_129] (rows=255550079 width=88) - Conds:RS_44._col1=RS_153._col0(Inner),Output:["_col2","_col6","_col8","_col9","_col10","_col11"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + Merge Join Operator [MERGEJOIN_136] (rows=65392 width=704) + Conds:RS_46._col1=RS_174._col0(Inner),Output:["_col2","_col6","_col8","_col9","_col10","_col11"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_174] PartitionCols:_col0 - Select Operator [SEL_152] (rows=462000 width=1436) + Select Operator [SEL_173] (rows=462000 width=511) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_151] (rows=462000 width=1436) + Filter Operator [FIL_172] (rows=462000 width=511) predicate:i_item_sk is not null - TableScan [TS_36] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] + TableScan [TS_36] (rows=462000 width=511) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col1 - Filter Operator [FIL_43] (rows=232318249 width=88) - predicate:(_col2 <= (0.1 * _col4)) - Merge Join Operator [MERGEJOIN_128] (rows=696954748 width=88) - Conds:RS_163._col0=RS_173._col0(Inner),RS_163._col0=RS_144._col0(Inner),Output:["_col1","_col2","_col4","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - PartitionCols:_col0 - Select Operator [SEL_143] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_142] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_33] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] - PartitionCols:_col0 - Select Operator [SEL_172] (rows=158398803 width=88) - Output:["_col0","_col1"] - Group By Operator [GBY_171] (rows=158398803 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_170] (rows=316797606 width=88) - Output:["_col1","_col2"] - Group By Operator [GBY_169] (rows=316797606 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Group By Operator [GBY_24] (rows=633595212 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_127] (rows=633595212 width=88) - Conds:RS_168._col0=RS_134._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_134] - PartitionCols:_col0 - Select Operator [SEL_131] (rows=73049 width=1119) - Output:["_col0"] - Filter Operator [FIL_130] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] - PartitionCols:_col0 - Select Operator [SEL_167] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_166] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_41_store_s_store_sk_min) AND DynamicValue(RS_41_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_41_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_14] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_150] - Group By Operator [GBY_148] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] - Group By Operator [GBY_146] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_145] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_143] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_165] - Group By Operator [GBY_164] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - Group By Operator [GBY_137] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_135] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_131] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] - PartitionCols:_col0 - Group By Operator [GBY_162] (rows=316797606 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=633595212 width=88) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_126] (rows=633595212 width=88) - Conds:RS_161._col0=RS_132._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_131] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] - PartitionCols:_col0 - Select Operator [SEL_160] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_159] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_45_item_i_item_sk_min) AND DynamicValue(RS_45_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_45_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_41_store_s_store_sk_min) AND DynamicValue(RS_41_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_41_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_149] - Please refer to the previous Group By Operator [GBY_148] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - Group By Operator [GBY_155] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_154] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_152] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - Group By Operator [GBY_136] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_131] + Merge Join Operator [MERGEJOIN_135] (rows=65392 width=204) + Conds:RS_43._col0=RS_171._col0(Inner),Output:["_col1","_col2","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_171] + PartitionCols:_col0 + Select Operator [SEL_170] (rows=1704 width=92) + Output:["_col0","_col1"] + Filter Operator [FIL_169] (rows=1704 width=92) + predicate:s_store_sk is not null + TableScan [TS_33] (rows=1704 width=92) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_43] + PartitionCols:_col0 + Filter Operator [FIL_42] (rows=65392 width=231) + predicate:(_col2 <= (0.1 * _col4)) + Merge Join Operator [MERGEJOIN_134] (rows=196176 width=231) + Conds:RS_153._col0=RS_168._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_153] + PartitionCols:_col0 + Group By Operator [GBY_152] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_132] (rows=91197860 width=89) + Conds:RS_151._col0=RS_139._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + PartitionCols:_col0 + Select Operator [SEL_138] (rows=317 width=8) + Output:["_col0"] + Filter Operator [FIL_137] (rows=317 width=8) + predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_149] (rows=525329897 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_148] + Group By Operator [GBY_147] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + Group By Operator [GBY_143] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_140] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_138] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_168] + PartitionCols:_col0 + Select Operator [SEL_167] (rows=17 width=115) + Output:["_col0","_col1"] + Group By Operator [GBY_166] (rows=17 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_165] (rows=184637 width=118) + Output:["_col1","_col2"] + Group By Operator [GBY_164] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_133] (rows=91197860 width=89) + Conds:RS_163._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_138] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Select Operator [SEL_162] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_161] (rows=525329897 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_39_store_sales_ss_store_sk_min) AND DynamicValue(RS_39_store_sales_ss_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_39_store_sales_ss_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_14] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + Group By Operator [GBY_144] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_142] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_138] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_154] (rows=184637 width=2) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_152] diff --git a/ql/src/test/results/clientpositive/perf/tez/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/query66.q.out index 8fe44626f9..225b62f7e2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -457,10 +457,10 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) -Map 27 <- Reducer 17 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Map 25 <- Reducer 17 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Map 21 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Map 24 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) @@ -471,8 +471,6 @@ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 24 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 24 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) @@ -485,170 +483,159 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_278] - Select Operator [SEL_277] (rows=100 width=135) + File Output Operator [FS_270] + Select Operator [SEL_269] (rows=100 width=4614) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] - Limit [LIM_276] (rows=100 width=135) + Limit [LIM_268] (rows=100 width=4510) Number of rows:100 - Select Operator [SEL_275] (rows=158120068 width=135) + Select Operator [SEL_267] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_274] - Group By Operator [GBY_273] (rows=158120068 width=135) + SHUFFLE [RS_266] + Group By Operator [GBY_265] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 7 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_294] + Reduce Output Operator [RS_284] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_293] (rows=316240137 width=135) + Group By Operator [GBY_283] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_292] (rows=316240137 width=135) + Top N Key Operator [TNK_282] (rows=2513727 width=3166) keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 - Select Operator [SEL_291] (rows=316240137 width=135) + Select Operator [SEL_281] (rows=2513727 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_290] (rows=210822976 width=135) + Group By Operator [GBY_280] (rows=2513700 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_62] (rows=421645953 width=135) + Group By Operator [GBY_62] (rows=5559759 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_60] (rows=421645953 width=135) + Select Operator [SEL_60] (rows=5559759 width=680) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_204] (rows=421645953 width=135) - Conds:RS_57._col3=RS_257._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] + Merge Join Operator [MERGEJOIN_204] (rows=5559759 width=680) + Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] + SHUFFLE [RS_259] PartitionCols:_col0 - Select Operator [SEL_254] (rows=27 width=1029) + Select Operator [SEL_257] (rows=27 width=482) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_253] (rows=27 width=1029) + Filter Operator [FIL_256] (rows=27 width=482) predicate:w_warehouse_sk is not null - TableScan [TS_12] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] + TableScan [TS_12] (rows=27 width=482) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_203] (rows=5559759 width=205) Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_245] PartitionCols:_col0 - Select Operator [SEL_242] (rows=1 width=0) + Select Operator [SEL_242] (rows=1 width=88) Output:["_col0"] - Filter Operator [FIL_241] (rows=1 width=0) + Filter Operator [FIL_241] (rows=1 width=88) predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) - TableScan [TS_9] (rows=1 width=0) - default@ship_mode,ship_mode,Tbl:PARTIAL,Col:NONE,Output:["sm_ship_mode_sk","sm_carrier"] + TableScan [TS_9] (rows=1 width=88) + default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_202] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_202] (rows=11119518 width=224) Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col11"] <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_233] PartitionCols:_col0 - Select Operator [SEL_230] (rows=36524 width=1119) + Select Operator [SEL_230] (rows=652 width=12) Output:["_col0","_col2"] - Filter Operator [FIL_229] (rows=36524 width=1119) + Filter Operator [FIL_229] (rows=652 width=12) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_51] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_201] (rows=316788826 width=135) - Conds:RS_289._col1=RS_221._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_201] (rows=31363607 width=234) + Conds:RS_279._col1=RS_221._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_218] (rows=9600 width=471) + Select Operator [SEL_218] (rows=9600 width=8) Output:["_col0"] - Filter Operator [FIL_217] (rows=9600 width=471) + Filter Operator [FIL_217] (rows=9600 width=8) predicate:(t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) - TableScan [TS_3] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_time"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + TableScan [TS_3] (rows=86400 width=8) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_279] PartitionCols:_col1 - Select Operator [SEL_288] (rows=287989836 width=135) + Select Operator [SEL_278] (rows=282272460 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_287] (rows=287989836 width=135) - predicate:((cs_ship_mode_sk BETWEEN DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and (cs_sold_time_sk BETWEEN DynamicValue(RS_49_time_dim_t_time_sk_min) AND DynamicValue(RS_49_time_dim_t_time_sk_max) and in_bloom_filter(cs_sold_time_sk, DynamicValue(RS_49_time_dim_t_time_sk_bloom_filter))) and (cs_warehouse_sk BETWEEN DynamicValue(RS_58_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_58_warehouse_w_warehouse_sk_max) and in_bloom_filter(cs_warehouse_sk, DynamicValue(RS_58_warehouse_w_warehouse_sk_bloom_filter))) and cs_ship_mode_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk is not null) - TableScan [TS_33] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + Filter Operator [FIL_277] (rows=282272460 width=243) + predicate:((cs_ship_mode_sk BETWEEN DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and (cs_sold_time_sk BETWEEN DynamicValue(RS_49_time_dim_t_time_sk_min) AND DynamicValue(RS_49_time_dim_t_time_sk_max) and in_bloom_filter(cs_sold_time_sk, DynamicValue(RS_49_time_dim_t_time_sk_bloom_filter))) and cs_ship_mode_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_33] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_280] - Group By Operator [GBY_279] (rows=1 width=12) + BROADCAST [RS_272] + Group By Operator [GBY_271] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_226] Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=9600 width=471) + Select Operator [SEL_222] (rows=9600 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_218] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_282] - Group By Operator [GBY_281] (rows=1 width=12) + BROADCAST [RS_274] + Group By Operator [GBY_273] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_238] Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_234] (rows=36524 width=1119) + Select Operator [SEL_234] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_230] <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_284] - Group By Operator [GBY_283] (rows=1 width=12) + BROADCAST [RS_276] + Group By Operator [GBY_275] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_250] Group By Operator [GBY_248] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_246] (rows=1 width=0) + Select Operator [SEL_246] (rows=1 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_242] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_286] - Group By Operator [GBY_285] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_262] - Group By Operator [GBY_260] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_258] (rows=27 width=1029) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_254] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_272] + Reduce Output Operator [RS_264] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_271] (rows=316240137 width=135) + Group By Operator [GBY_263] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_270] (rows=316240137 width=135) + Top N Key Operator [TNK_262] (rows=2513727 width=3166) keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 - Select Operator [SEL_269] (rows=316240137 width=135) + Select Operator [SEL_261] (rows=2513727 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_268] (rows=105417161 width=135) + Group By Operator [GBY_260] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_29] (rows=210834322 width=135) + Group By Operator [GBY_29] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_27] (rows=210834322 width=135) + Select Operator [SEL_27] (rows=2853684 width=707) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_200] (rows=210834322 width=135) - Conds:RS_24._col3=RS_255._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] + Merge Join Operator [MERGEJOIN_200] (rows=2853684 width=707) + Conds:RS_24._col3=RS_258._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] + SHUFFLE [RS_258] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_254] + Please refer to the previous Select Operator [SEL_257] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_199] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_199] (rows=2853684 width=233) Conds:RS_21._col2=RS_243._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_243] @@ -657,7 +644,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_198] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_198] (rows=5707369 width=238) Conds:RS_18._col0=RS_231._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col11"] <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_231] @@ -666,21 +653,21 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_197] (rows=158402938 width=135) - Conds:RS_267._col1=RS_219._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_197] (rows=15984351 width=239) + Conds:RS_255._col1=RS_219._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_219] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_218] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_267] + SHUFFLE [RS_255] PartitionCols:_col1 - Select Operator [SEL_266] (rows=144002668 width=135) + Select Operator [SEL_254] (rows=143859154 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_265] (rows=144002668 width=135) - predicate:((ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_16_time_dim_t_time_sk_min) AND DynamicValue(RS_16_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_16_time_dim_t_time_sk_bloom_filter))) and (ws_warehouse_sk BETWEEN DynamicValue(RS_25_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_25_warehouse_w_warehouse_sk_max) and in_bloom_filter(ws_warehouse_sk, DynamicValue(RS_25_warehouse_w_warehouse_sk_bloom_filter))) and ws_ship_mode_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] + Filter Operator [FIL_253] (rows=143859154 width=243) + predicate:((ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_16_time_dim_t_time_sk_min) AND DynamicValue(RS_16_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_16_time_dim_t_time_sk_bloom_filter))) and ws_ship_mode_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk is not null) + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_228] Group By Operator [GBY_227] (rows=1 width=12) @@ -689,7 +676,7 @@ Stage-0 SHUFFLE [RS_225] Group By Operator [GBY_223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_220] (rows=9600 width=471) + Select Operator [SEL_220] (rows=9600 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_218] <-Reducer 19 [BROADCAST_EDGE] vectorized @@ -700,7 +687,7 @@ Stage-0 SHUFFLE [RS_237] Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_232] (rows=36524 width=1119) + Select Operator [SEL_232] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_230] <-Reducer 22 [BROADCAST_EDGE] vectorized @@ -711,18 +698,7 @@ Stage-0 SHUFFLE [RS_249] Group By Operator [GBY_247] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_244] (rows=1 width=0) + Select Operator [SEL_244] (rows=1 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_242] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_264] - Group By Operator [GBY_263] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_261] - Group By Operator [GBY_259] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_256] (rows=27 width=1029) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_254] diff --git a/ql/src/test/results/clientpositive/perf/tez/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/query67.q.out index 303ed224bc..b290331165 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query67.q.out @@ -97,12 +97,10 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) @@ -113,105 +111,83 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_122] - Limit [LIM_121] (rows=100 width=88) + File Output Operator [FS_112] + Limit [LIM_111] (rows=100 width=617) Number of rows:100 - Select Operator [SEL_120] (rows=1149975358 width=88) + Select Operator [SEL_110] (rows=273593580 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] - Select Operator [SEL_118] (rows=1149975358 width=88) + SHUFFLE [RS_109] + Select Operator [SEL_108] (rows=273593580 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_117] (rows=1149975358 width=88) + Filter Operator [FIL_107] (rows=273593580 width=613) predicate:(rank_window_0 <= 100) - PTF Operator [PTF_116] (rows=3449926075 width=88) + PTF Operator [PTF_106] (rows=820780740 width=613) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_115] (rows=3449926075 width=88) + Select Operator [SEL_105] (rows=820780740 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_113] (rows=3449926075 width=88) + Select Operator [SEL_103] (rows=820780740 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_112] (rows=3449926075 width=88) + Group By Operator [GBY_102] (rows=820780740 width=621) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_23] (rows=6899852151 width=88) + Group By Operator [GBY_23] (rows=820780740 width=621) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col8)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, 0L - Select Operator [SEL_21] (rows=766650239 width=88) + Select Operator [SEL_21] (rows=91197860 width=586) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) - Conds:RS_18._col1=RS_103._col0(Inner),Output:["_col3","_col4","_col7","_col8","_col9","_col11","_col13","_col14","_col15","_col16"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + Merge Join Operator [MERGEJOIN_84] (rows=91197860 width=586) + Conds:RS_18._col1=RS_101._col0(Inner),Output:["_col3","_col4","_col7","_col8","_col9","_col11","_col13","_col14","_col15","_col16"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_102] (rows=462000 width=1436) + Select Operator [SEL_100] (rows=462000 width=393) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_101] (rows=462000 width=1436) + Filter Operator [FIL_99] (rows=462000 width=393) predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] + TableScan [TS_9] (rows=462000 width=393) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) - Conds:RS_15._col2=RS_95._col0(Inner),Output:["_col1","_col3","_col4","_col7","_col8","_col9","_col11"] + Merge Join Operator [MERGEJOIN_83] (rows=91197860 width=201) + Conds:RS_15._col2=RS_98._col0(Inner),Output:["_col1","_col3","_col4","_col7","_col8","_col9","_col11"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] + SHUFFLE [RS_98] PartitionCols:_col0 - Select Operator [SEL_94] (rows=1704 width=1910) + Select Operator [SEL_97] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_93] (rows=1704 width=1910) + Filter Operator [FIL_96] (rows=1704 width=104) predicate:s_store_sk is not null - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] + TableScan [TS_6] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) - Conds:RS_111._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_82] (rows=91197860 width=104) + Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=73049 width=1119) + Select Operator [SEL_86] (rows=317 width=20) Output:["_col0","_col2","_col3","_col4"] - Filter Operator [FIL_85] (rows=73049 width=1119) + Filter Operator [FIL_85] (rows=317 width=20) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] + TableScan [TS_3] (rows=73049 width=20) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_110] (rows=575995635 width=88) + Select Operator [SEL_94] (rows=525329897 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_109] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_16_store_s_store_sk_min) AND DynamicValue(RS_16_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_16_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_100] - Group By Operator [GBY_99] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - Group By Operator [GBY_97] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_94] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Filter Operator [FIL_93] (rows=525329897 width=122) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_92] Group By Operator [GBY_91] (rows=1 width=12) @@ -220,7 +196,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=73049 width=1119) + Select Operator [SEL_88] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/query68.q.out index 003188c1c4..7c94381d40 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query68.q.out @@ -97,184 +97,160 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Map 8 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 16 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_192] - Limit [LIM_191] (rows=100 width=88) + File Output Operator [FS_185] + Limit [LIM_184] (rows=100 width=706) Number of rows:100 - Select Operator [SEL_190] (rows=463823414 width=88) + Select Operator [SEL_183] (rows=4418634 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_46] - Select Operator [SEL_45] (rows=463823414 width=88) + Select Operator [SEL_45] (rows=4418634 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_44] (rows=463823414 width=88) + Filter Operator [FIL_44] (rows=4418634 width=706) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_145] (rows=463823414 width=88) - Conds:RS_41._col0=RS_189._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_145] (rows=4418634 width=706) + Conds:RS_41._col0=RS_182._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_41] + SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_140] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_140] (rows=80000000 width=277) Conds:RS_148._col1=RS_151._col0(Inner),Output:["_col0","_col2","_col3","_col5"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_151] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] PartitionCols:_col0 - Select Operator [SEL_150] (rows=40000000 width=1014) + Select Operator [SEL_150] (rows=40000000 width=97) Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=40000000 width=1014) + Filter Operator [FIL_149] (rows=40000000 width=97) predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,current_addr,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_city"] + TableScan [TS_3] (rows=40000000 width=97) + default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_148] PartitionCols:_col1 - Select Operator [SEL_147] (rows=80000000 width=860) + Select Operator [SEL_147] (rows=80000000 width=188) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_146] (rows=80000000 width=860) + Filter Operator [FIL_146] (rows=80000000 width=188) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] + TableScan [TS_0] (rows=80000000 width=188) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_182] PartitionCols:_col1 - Select Operator [SEL_188] (rows=421657640 width=88) + Select Operator [SEL_181] (rows=4418634 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_187] (rows=421657640 width=88) + Group By Operator [GBY_180] (rows=4418634 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 7 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_34] (rows=843315281 width=88) + Group By Operator [GBY_34] (rows=4418634 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col1, _col18, _col3, _col5 - Merge Join Operator [MERGEJOIN_144] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_144] (rows=4418634 width=97) Conds:RS_30._col3=RS_152._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col18"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_152] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_150] - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_143] (rows=766650239 width=88) - Conds:RS_27._col2=RS_174._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_174] + Merge Join Operator [MERGEJOIN_143] (rows=4418634 width=4) + Conds:RS_27._col2=RS_171._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_171] PartitionCols:_col0 - Select Operator [SEL_173] (rows=7200 width=107) + Select Operator [SEL_170] (rows=1855 width=12) Output:["_col0"] - Filter Operator [FIL_172] (rows=7200 width=107) + Filter Operator [FIL_169] (rows=1855 width=12) predicate:(((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 12 [SIMPLE_EDGE] + TableScan [TS_15] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_142] (rows=696954748 width=88) - Conds:RS_24._col4=RS_166._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_166] + Merge Join Operator [MERGEJOIN_142] (rows=17150490 width=4) + Conds:RS_24._col4=RS_163._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_165] (rows=1704 width=1910) + Select Operator [SEL_162] (rows=85 width=97) Output:["_col0"] - Filter Operator [FIL_164] (rows=1704 width=1910) + Filter Operator [FIL_161] (rows=85 width=97) predicate:((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_city"] - <-Reducer 11 [SIMPLE_EDGE] + TableScan [TS_12] (rows=1704 width=97) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_141] (rows=633595212 width=88) - Conds:RS_186._col0=RS_158._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 14 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] + Merge Join Operator [MERGEJOIN_141] (rows=42598570 width=185) + Conds:RS_179._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] PartitionCols:_col0 - Select Operator [SEL_157] (rows=73049 width=1119) + Select Operator [SEL_154] (rows=170 width=12) Output:["_col0"] - Filter Operator [FIL_156] (rows=73049 width=1119) + Filter Operator [FIL_153] (rows=170 width=12) predicate:((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + TableScan [TS_9] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_179] PartitionCols:_col0 - Select Operator [SEL_185] (rows=575995635 width=88) + Select Operator [SEL_178] (rows=457565061 width=343) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_184] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_31_customer_address_ca_address_sk_min) AND DynamicValue(RS_31_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_31_customer_address_ca_address_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_41_customer_c_customer_sk_min) AND DynamicValue(RS_41_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_41_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_28_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_28_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_28_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_ext_sales_price","ss_ext_list_price","ss_ext_tax"] + Filter Operator [FIL_177] (rows=457565061 width=343) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_28_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_28_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_28_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=343) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_ext_sales_price","ss_ext_list_price","ss_ext_tax"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_156] (rows=170 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_154] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_163] - Group By Operator [GBY_162] (rows=1 width=12) + BROADCAST [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_159] (rows=73049 width=1119) + Select Operator [SEL_164] (rows=85 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_157] + Please refer to the previous Select Operator [SEL_162] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_171] - Group By Operator [GBY_170] (rows=1 width=12) + BROADCAST [RS_176] + Group By Operator [GBY_175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_169] - Group By Operator [GBY_168] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_167] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_165] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_179] - Group By Operator [GBY_178] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_177] - Group By Operator [GBY_176] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_174] + Group By Operator [GBY_173] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_175] (rows=7200 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_173] - <-Reducer 5 [BROADCAST_EDGE] vectorized - BROADCAST [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_117] - Group By Operator [GBY_116] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] - Select Operator [SEL_115] (rows=88000001 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_140] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_181] - Group By Operator [GBY_180] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] - Group By Operator [GBY_154] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_153] (rows=40000000 width=1014) + Select Operator [SEL_172] (rows=1855 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_150] + Please refer to the previous Select Operator [SEL_170] diff --git a/ql/src/test/results/clientpositive/perf/tez/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/query69.q.out index b8e9674bdd..65601d745c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -109,223 +109,253 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 14 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 21 <- Reducer 17 (BROADCAST_EDGE) -Map 22 <- Reducer 20 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 13 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_233] - Limit [LIM_232] (rows=100 width=88) + Reducer 8 vectorized + File Output Operator [FS_240] + Limit [LIM_239] (rows=1 width=383) Number of rows:100 - Select Operator [SEL_231] (rows=191662559 width=88) + Select Operator [SEL_238] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] - Select Operator [SEL_229] (rows=191662559 width=88) + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + Select Operator [SEL_236] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_228] (rows=191662559 width=88) + Group By Operator [GBY_235] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_67] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_69] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_66] (rows=383325119 width=88) + Group By Operator [GBY_68] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_105] (rows=383325119 width=88) + Top N Key Operator [TNK_105] (rows=1 width=363) keys:_col6, _col7, _col8, _col9, _col10,sort order:+++++,top n:100 - Select Operator [SEL_65] (rows=383325119 width=88) + Select Operator [SEL_67] (rows=1 width=363) Output:["_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_64] (rows=383325119 width=88) + Filter Operator [FIL_66] (rows=1 width=363) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_181] (rows=766650239 width=88) - Conds:RS_61._col0=RS_227._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col14"] - <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_227] + Merge Join Operator [MERGEJOIN_184] (rows=1 width=363) + Conds:RS_63._col0=RS_234._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col14"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_226] (rows=158394413 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_225] (rows=158394413 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0 - Group By Operator [GBY_57] (rows=316788826 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=316788826 width=135) - Conds:RS_224._col0=RS_197._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_197] - PartitionCols:_col0 - Select Operator [SEL_192] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_191] (rows=36524 width=1119) - predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] - PartitionCols:_col0 - Select Operator [SEL_223] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_222] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_54_date_dim_d_date_sk_min) AND DynamicValue(RS_54_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_54_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_47] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_221] - Group By Operator [GBY_220] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_204] - Group By Operator [GBY_201] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] - <-Reducer 4 [ONE_TO_ONE_EDGE] - FORWARD [RS_61] - PartitionCols:_col0 - Select Operator [SEL_46] (rows=696954748 width=88) + Select Operator [SEL_48] (rows=1 width=367) Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_45] (rows=696954748 width=88) + Filter Operator [FIL_47] (rows=1 width=367) predicate:_col12 is null - Merge Join Operator [MERGEJOIN_180] (rows=1393909496 width=88) - Conds:RS_41._col0=RS_42._col0(Left Semi),RS_41._col0=RS_219._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] - <-Reducer 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_41] + Merge Join Operator [MERGEJOIN_183] (rows=33 width=367) + Conds:RS_44._col0=RS_224._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_44] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_176] (rows=96800003 width=860) - Conds:RS_36._col1=RS_190._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] + Merge Join Operator [MERGEJOIN_182] (rows=6841 width=363) + Conds:RS_41._col0=RS_42._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col0 - Select Operator [SEL_189] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_188] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_175] (rows=88000001 width=860) - Conds:RS_184._col2=RS_187._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] - PartitionCols:_col2 - Select Operator [SEL_183] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_182] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] - PartitionCols:_col0 - Select Operator [SEL_186] (rows=40000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_185] (rows=40000000 width=1014) - predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Group By Operator [GBY_40] (rows=633595212 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_177] (rows=633595212 width=88) - Conds:RS_211._col0=RS_193._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_178] (rows=4605476 width=363) + Conds:RS_36._col1=RS_193._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_192] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_211] + SHUFFLE [RS_193] PartitionCols:_col0 - Select Operator [SEL_210] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_209] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_41_c_c_customer_sk_min) AND DynamicValue(RS_41_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_41_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_199] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_208] - Group By Operator [GBY_207] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_138] - Group By Operator [GBY_137] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"] - Select Operator [SEL_136] (rows=96800003 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_176] - <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_219] + Select Operator [SEL_192] (rows=1861800 width=363) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_191] (rows=1861800 width=363) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=363) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=4541258 width=5) + Conds:RS_187._col2=RS_190._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col2 + Select Operator [SEL_186] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_185] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=2352941 width=90) + Output:["_col0"] + Filter Operator [FIL_188] (rows=2352941 width=90) + predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Group By Operator [GBY_40] (rows=116289 width=1) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=43153353 width=1) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_179] (rows=43153353 width=1) + Conds:RS_214._col0=RS_196._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_196] + PartitionCols:_col0 + Select Operator [SEL_195] (rows=150 width=12) + Output:["_col0"] + Filter Operator [FIL_194] (rows=150 width=12) + predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_212] (rows=525327388 width=7) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_41_c_c_customer_sk_min) AND DynamicValue(RS_41_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_41_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4291485)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_138] + Group By Operator [GBY_137] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4291485)"] + Select Operator [SEL_136] (rows=4605476 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_178] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_209] + Group By Operator [GBY_208] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_205] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_197] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_195] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_224] PartitionCols:_col0 - Select Operator [SEL_218] (rows=79201469 width=135) + Select Operator [SEL_223] (rows=116289 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_217] (rows=79201469 width=135) + Group By Operator [GBY_222] (rows=116289 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_29] (rows=158402938 width=135) + Group By Operator [GBY_29] (rows=116289 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=158402938 width=135) - Conds:RS_216._col0=RS_195._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] + Merge Join Operator [MERGEJOIN_180] (rows=11823304 width=3) + Conds:RS_221._col0=RS_198._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_198] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_192] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + Please refer to the previous Select Operator [SEL_195] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_215] (rows=144002668 width=135) + Select Operator [SEL_220] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_214] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) + Filter Operator [FIL_219] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_44_c_c_customer_sk_min) AND DynamicValue(RS_44_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_44_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_203] - Group By Operator [GBY_200] (rows=1 width=12) + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_153] + Group By Operator [GBY_152] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=36524 width=1119) + Select Operator [SEL_151] (rows=6841 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Merge Join Operator [MERGEJOIN_182] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_206] + Group By Operator [GBY_203] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_199] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_195] + <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_234] + PartitionCols:_col0 + Select Operator [SEL_233] (rows=115467 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_232] (rows=115467 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=115467 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_181] (rows=23255411 width=3) + Conds:RS_231._col0=RS_200._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_195] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_229] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_63_c_c_customer_sk_min) AND DynamicValue(RS_63_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_63_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_56_date_dim_d_date_sk_min) AND DynamicValue(RS_56_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_56_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_49] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_207] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_201] (rows=150 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_195] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_228] + Group By Operator [GBY_227] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_166] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_48] diff --git a/ql/src/test/results/clientpositive/perf/tez/query7.q.out b/ql/src/test/results/clientpositive/perf/tez/query7.q.out index 6e18bcf5ea..6cd8fb58f4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query7.q.out @@ -53,14 +53,12 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -70,86 +68,86 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_140] - Limit [LIM_139] (rows=100 width=88) + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_138] (rows=421657640 width=88) + Select Operator [SEL_128] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] - Select Operator [SEL_136] (rows=421657640 width=88) + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_135] (rows=421657640 width=88) + Group By Operator [GBY_125] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 - Group By Operator [GBY_28] (rows=843315281 width=88) + Group By Operator [GBY_28] (rows=462000 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col18 - Top N Key Operator [TNK_55] (rows=843315281 width=88) + Top N Key Operator [TNK_55] (rows=1441769 width=100) keys:_col18,sort order:+,top n:100 - Merge Join Operator [MERGEJOIN_99] (rows=843315281 width=88) - Conds:RS_24._col1=RS_126._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + Merge Join Operator [MERGEJOIN_99] (rows=1441769 width=100) + Conds:RS_24._col1=RS_124._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Select Operator [SEL_125] (rows=462000 width=1436) + Select Operator [SEL_123] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=462000 width=1436) + Filter Operator [FIL_122] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + TableScan [TS_12] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_98] (rows=766650239 width=88) - Conds:RS_21._col3=RS_118._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_98] (rows=1441769 width=4) + Conds:RS_21._col3=RS_121._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_121] PartitionCols:_col0 - Select Operator [SEL_117] (rows=2300 width=1179) + Select Operator [SEL_120] (rows=2300 width=174) Output:["_col0"] - Filter Operator [FIL_116] (rows=2300 width=1179) + Filter Operator [FIL_119] (rows=2300 width=174) predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) - TableScan [TS_9] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_email","p_channel_event"] + TableScan [TS_9] (rows=2300 width=174) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_97] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_97] (rows=1441769 width=4) Conds:RS_18._col0=RS_110._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_109] (rows=36524 width=1119) + Select Operator [SEL_109] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_108] (rows=36524 width=1119) + Filter Operator [FIL_108] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_6] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=633595212 width=88) - Conds:RS_134._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_96] (rows=4037893 width=4) + Conds:RS_118._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_101] (rows=232725 width=385) + Select Operator [SEL_101] (rows=14776 width=265) Output:["_col0"] - Filter Operator [FIL_100] (rows=232725 width=385) + Filter Operator [FIL_100] (rows=14776 width=268) predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + TableScan [TS_3] (rows=1861800 width=268) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_134] + SHUFFLE [RS_118] PartitionCols:_col2 - Select Operator [SEL_133] (rows=575995635 width=88) + Select Operator [SEL_117] (rows=501686735 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_132] (rows=575995635 width=88) - predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_22_promotion_p_promo_sk_min) AND DynamicValue(RS_22_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_22_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_cdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + Filter Operator [FIL_116] (rows=501686735 width=340) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_cdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=340) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_115] Group By Operator [GBY_114] (rows=1 width=12) @@ -158,31 +156,9 @@ Stage-0 SHUFFLE [RS_113] Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_111] (rows=36524 width=1119) + Select Operator [SEL_111] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_109] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_119] (rows=2300 width=1179) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_117] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_127] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_125] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_107] Group By Operator [GBY_106] (rows=1 width=12) @@ -191,7 +167,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_105] Group By Operator [GBY_104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=232725 width=385) + Select Operator [SEL_103] (rows=14776 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_101] diff --git a/ql/src/test/results/clientpositive/perf/tez/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/query70.q.out index 15cd58376a..e8743e3298 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query70.q.out @@ -83,186 +83,134 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 17 <- Reducer 14 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) -Reducer 10 <- Map 17 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_187] - Limit [LIM_186] (rows=100 width=88) + File Output Operator [FS_168] + Limit [LIM_167] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_185] (rows=1149975358 width=88) + Select Operator [SEL_166] (rows=240 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] - Select Operator [SEL_183] (rows=1149975358 width=88) + SHUFFLE [RS_165] + Select Operator [SEL_164] (rows=240 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_182] (rows=1149975358 width=88) + PTF Operator [PTF_163] (rows=240 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_181] (rows=1149975358 width=88) + Select Operator [SEL_162] (rows=240 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] + SHUFFLE [RS_161] PartitionCols:(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_179] (rows=1149975358 width=88) + Select Operator [SEL_160] (rows=240 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_178] (rows=1149975358 width=88) + Group By Operator [GBY_159] (rows=240 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=2299950717 width=88) + Group By Operator [GBY_48] (rows=18000 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L - Select Operator [SEL_46] (rows=766650239 width=88) + Select Operator [SEL_46] (rows=29778893 width=207) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_134] (rows=766650239 width=88) - Conds:RS_43._col7=RS_177._col0(Inner),Output:["_col2","_col6","_col7"] - <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_177] + Merge Join Operator [MERGEJOIN_134] (rows=29778893 width=207) + Conds:RS_43._col7=RS_158._col0(Inner),Output:["_col2","_col6","_col7"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] PartitionCols:_col0 - Select Operator [SEL_176] (rows=116159124 width=88) + Select Operator [SEL_157] (rows=16 width=86) Output:["_col0"] - Filter Operator [FIL_175] (rows=116159124 width=88) + Filter Operator [FIL_156] (rows=16 width=198) predicate:(rank_window_0 <= 5) - PTF Operator [PTF_174] (rows=348477374 width=88) + PTF Operator [PTF_155] (rows=49 width=198) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_173] (rows=348477374 width=88) + Select Operator [SEL_154] (rows=49 width=198) Output:["_col0","_col1"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=348477374 width=88) + Group By Operator [GBY_152] (rows=49 width=198) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_25] (rows=696954748 width=88) + Group By Operator [GBY_25] (rows=1704 width=198) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 - Merge Join Operator [MERGEJOIN_133] (rows=696954748 width=88) - Conds:RS_21._col1=RS_162._col0(Inner),Output:["_col2","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_162] - PartitionCols:_col0 - Select Operator [SEL_161] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=1704 width=1910) - predicate:(s_state is not null and s_store_sk is not null) - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 10 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_133] (rows=91197860 width=168) + Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col6"] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_132] (rows=633595212 width=88) - Conds:RS_170._col0=RS_139._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_139] + Merge Join Operator [MERGEJOIN_130] (rows=91197860 width=85) + Conds:RS_145._col0=RS_137._col0(Inner),Output:["_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_136] (rows=73049 width=1119) + Select Operator [SEL_136] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_135] (rows=73049 width=1119) + Filter Operator [FIL_135] (rows=317 width=8) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] PartitionCols:_col0 - Select Operator [SEL_169] (rows=575995635 width=88) + Select Operator [SEL_144] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_168] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) + Filter Operator [FIL_143] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_142] + Group By Operator [GBY_141] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_144] - Group By Operator [GBY_142] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + Group By Operator [GBY_139] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_140] (rows=73049 width=1119) + Select Operator [SEL_138] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_136] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_167] - Group By Operator [GBY_166] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_165] - Group By Operator [GBY_164] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_163] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_161] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=1704 width=90) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=1704 width=90) + predicate:(s_state is not null and s_store_sk is not null) + TableScan [TS_15] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_131] (rows=696954748 width=88) - Conds:RS_40._col1=RS_149._col0(Inner),Output:["_col2","_col6","_col7"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] - PartitionCols:_col0 - Select Operator [SEL_148] (rows=1704 width=1910) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_147] (rows=1704 width=1910) - predicate:(s_state is not null and s_store_sk is not null) - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] + Merge Join Operator [MERGEJOIN_131] (rows=91197860 width=266) + Conds:RS_40._col1=RS_148._col0(Inner),Output:["_col2","_col6","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_130] (rows=633595212 width=88) - Conds:RS_157._col0=RS_137._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_137] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_136] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_157] - PartitionCols:_col0 - Select Operator [SEL_156] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_155] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_41_store_s_store_sk_min) AND DynamicValue(RS_41_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_41_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_154] - Group By Operator [GBY_153] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] - Group By Operator [GBY_151] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_150] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_148] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_146] - Group By Operator [GBY_145] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_143] - Group By Operator [GBY_141] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_138] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_136] + Please refer to the previous Merge Join Operator [MERGEJOIN_130] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=1704 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_146] (rows=1704 width=188) + predicate:(s_state is not null and s_store_sk is not null) + TableScan [TS_6] (rows=1704 width=188) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county","s_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query71.q.out b/ql/src/test/results/clientpositive/perf/tez/query71.q.out index 833b6f6bbe..e031c3aa05 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query71.q.out @@ -113,70 +113,70 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_188] - Select Operator [SEL_187] (rows=670816149 width=108) + Select Operator [SEL_187] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_186] - Select Operator [SEL_185] (rows=670816149 width=108) + Select Operator [SEL_185] (rows=1991967 width=227) Output:["_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_184] (rows=670816149 width=108) + Group By Operator [GBY_184] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_45] (rows=1341632299 width=108) + Group By Operator [GBY_45] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col0)"],keys:_col4, _col8, _col9, _col5 - Merge Join Operator [MERGEJOIN_140] (rows=1341632299 width=108) + Merge Join Operator [MERGEJOIN_140] (rows=1991967 width=112) Conds:RS_41._col2=RS_173._col0(Inner),Output:["_col0","_col4","_col5","_col8","_col9"] <-Map 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_173] PartitionCols:_col0 - Select Operator [SEL_172] (rows=86400 width=471) + Select Operator [SEL_172] (rows=43200 width=99) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_171] (rows=86400 width=471) + Filter Operator [FIL_171] (rows=43200 width=99) predicate:((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) - TableScan [TS_35] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] + TableScan [TS_35] (rows=86400 width=99) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_139] (rows=1219665700 width=108) + Merge Join Operator [MERGEJOIN_139] (rows=3983933 width=104) Conds:Union 3._col1=RS_163._col0(Inner),Output:["_col0","_col2","_col4","_col5"] <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_162] (rows=231000 width=1436) + Select Operator [SEL_162] (rows=7333 width=111) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_161] (rows=231000 width=1436) + Filter Operator [FIL_161] (rows=7333 width=111) predicate:((i_manager_id = 1) and i_item_sk is not null) - TableScan [TS_32] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] + TableScan [TS_32] (rows=462000 width=111) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] <-Union 3 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] Reduce Output Operator [RS_148] PartitionCols:_col1 - Select Operator [SEL_146] (rows=316788826 width=135) + Select Operator [SEL_146] (rows=7751851 width=98) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_145] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_145] (rows=7751851 width=98) Conds:RS_199._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 12 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_191] PartitionCols:_col0 - Select Operator [SEL_190] (rows=18262 width=1119) + Select Operator [SEL_190] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_189] (rows=18262 width=1119) + Filter Operator [FIL_189] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_199] PartitionCols:_col0 - Select Operator [SEL_198] (rows=287989836 width=135) + Select Operator [SEL_198] (rows=285116947 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_197] (rows=287989836 width=135) + Filter Operator [FIL_197] (rows=285116947 width=123) predicate:((cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) and (cs_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(cs_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null) - TableScan [TS_10] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"] + TableScan [TS_10] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_169] Group By Operator [GBY_167] (rows=1 width=12) @@ -185,7 +185,7 @@ Stage-0 SHUFFLE [RS_166] Group By Operator [GBY_165] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_164] (rows=231000 width=1436) + Select Operator [SEL_164] (rows=7333 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_162] <-Reducer 21 [BROADCAST_EDGE] vectorized @@ -196,7 +196,7 @@ Stage-0 SHUFFLE [RS_176] Group By Operator [GBY_175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_174] (rows=86400 width=471) + Select Operator [SEL_174] (rows=43200 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_172] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -207,34 +207,34 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_194] Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_192] (rows=18262 width=1119) + Select Operator [SEL_192] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_190] <-Reducer 15 [CONTAINS] Reduce Output Operator [RS_152] PartitionCols:_col1 - Select Operator [SEL_150] (rows=633595212 width=88) + Select Operator [SEL_150] (rows=14384397 width=4) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_149] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_149] (rows=14384397 width=4) Conds:RS_210._col0=RS_202._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_202] PartitionCols:_col0 - Select Operator [SEL_201] (rows=18262 width=1119) + Select Operator [SEL_201] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_200] (rows=18262 width=1119) + Filter Operator [FIL_200] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) - TableScan [TS_24] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_24] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_210] PartitionCols:_col0 - Select Operator [SEL_209] (rows=575995635 width=88) + Select Operator [SEL_209] (rows=525325345 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_208] (rows=575995635 width=88) + Filter Operator [FIL_208] (rows=525325345 width=118) predicate:((ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_sold_time_sk is not null) - TableScan [TS_21] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"] + TableScan [TS_21] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_170] Please refer to the previous Group By Operator [GBY_167] @@ -249,34 +249,34 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_205] Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_203] (rows=18262 width=1119) + Select Operator [SEL_203] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_201] <-Reducer 2 [CONTAINS] Reduce Output Operator [RS_144] PartitionCols:_col1 - Select Operator [SEL_142] (rows=158402938 width=135) + Select Operator [SEL_142] (rows=3941098 width=118) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_141] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_141] (rows=3941098 width=118) Conds:RS_183._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_155] PartitionCols:_col0 - Select Operator [SEL_154] (rows=18262 width=1119) + Select Operator [SEL_154] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_153] (rows=18262 width=1119) + Filter Operator [FIL_153] (rows=50 width=12) predicate:((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_183] PartitionCols:_col0 - Select Operator [SEL_182] (rows=144002668 width=135) + Select Operator [SEL_182] (rows=143930836 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_181] (rows=144002668 width=135) + Filter Operator [FIL_181] (rows=143930836 width=123) predicate:((ws_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"] + TableScan [TS_0] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"] <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_168] Please refer to the previous Group By Operator [GBY_167] @@ -291,7 +291,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_158] Group By Operator [GBY_157] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_156] (rows=18262 width=1119) + Select Operator [SEL_156] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_154] diff --git a/ql/src/test/results/clientpositive/perf/tez/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/query72.q.out index 3fe6d21909..700a8769a2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query72.q.out @@ -81,263 +81,227 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 12 <- Map 19 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Map 9 <- Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) +Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 20 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 22 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Map 24 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 26 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 28 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 29 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 26 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_315] - Limit [LIM_314] (rows=100 width=135) + File Output Operator [FS_303] + Limit [LIM_302] (rows=100 width=312) Number of rows:100 - Select Operator [SEL_313] (rows=37725837 width=135) + Select Operator [SEL_301] (rows=384313734 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - Group By Operator [GBY_311] (rows=37725837 width=135) + SHUFFLE [RS_300] + Group By Operator [GBY_299] (rows=384313734 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_68] (rows=75451675 width=135) + Group By Operator [GBY_68] (rows=610435044 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_66] (rows=75451675 width=135) + Select Operator [SEL_66] (rows=1574305390 width=292) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_251] (rows=75451675 width=135) - Conds:RS_63._col4, _col6=RS_310._col0, _col1(Left Outer),Output:["_col13","_col15","_col22","_col28"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + Merge Join Operator [MERGEJOIN_251] (rows=1574305390 width=292) + Conds:RS_63._col4, _col6=RS_298._col0, _col1(Left Outer),Output:["_col13","_col15","_col22","_col28"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] PartitionCols:_col0, _col1 - Select Operator [SEL_309] (rows=28798881 width=106) + Select Operator [SEL_297] (rows=28798881 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_308] (rows=28798881 width=106) + Filter Operator [FIL_296] (rows=28798881 width=8) predicate:cr_item_sk is not null - TableScan [TS_60] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] + TableScan [TS_60] (rows=28798881 width=8) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col4, _col6 - Select Operator [SEL_59] (rows=68592431 width=135) + Select Operator [SEL_59] (rows=610435044 width=300) Output:["_col4","_col6","_col13","_col15","_col22","_col28"] - Merge Join Operator [MERGEJOIN_250] (rows=68592431 width=135) - Conds:RS_56._col0, _col20=RS_307._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20","_col26"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Merge Join Operator [MERGEJOIN_250] (rows=610435044 width=300) + Conds:RS_56._col0, _col20=RS_295._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20","_col26"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0, _col1 - Select Operator [SEL_306] (rows=73049 width=1119) + Select Operator [SEL_294] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_305] (rows=73049 width=1119) + Filter Operator [FIL_293] (rows=73049 width=8) predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_46] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] + TableScan [TS_46] (rows=73049 width=8) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_56] PartitionCols:_col0, _col20 - Filter Operator [FIL_55] (rows=62356755 width=135) + Filter Operator [FIL_55] (rows=545947820 width=311) predicate:(_col3 < _col17) - Merge Join Operator [MERGEJOIN_249] (rows=187070265 width=135) + Merge Join Operator [MERGEJOIN_249] (rows=1637843460 width=311) Conds:RS_52._col1=RS_53._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col20","_col26"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_242] (rows=41342400 width=15) - Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - PartitionCols:_col2 - Select Operator [SEL_253] (rows=37584000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_252] (rows=37584000 width=15) - predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) - TableScan [TS_0] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] - PartitionCols:_col0 - Select Operator [SEL_256] (rows=27 width=1029) - Output:["_col0","_col1"] - Filter Operator [FIL_255] (rows=27 width=1029) - predicate:w_warehouse_sk is not null - TableScan [TS_3] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col8 - Select Operator [SEL_45] (rows=170063874 width=135) + Select Operator [SEL_45] (rows=2726340 width=391) Output:["_col3","_col8","_col10","_col11","_col14","_col20"] - Filter Operator [FIL_44] (rows=170063874 width=135) + Filter Operator [FIL_44] (rows=2726340 width=391) predicate:(UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) - Merge Join Operator [MERGEJOIN_248] (rows=510191624 width=135) - Conds:RS_41._col1=RS_294._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col16","_col18","_col20"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_294] + Merge Join Operator [MERGEJOIN_248] (rows=8179022 width=391) + Conds:RS_41._col1=RS_292._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col16","_col18","_col20"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_293] (rows=73049 width=1119) + Select Operator [SEL_291] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_292] (rows=73049 width=1119) + Filter Operator [FIL_290] (rows=73049 width=98) predicate:d_date_sk is not null - TableScan [TS_23] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 15 [SIMPLE_EDGE] + TableScan [TS_23] (rows=73049 width=98) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_247] (rows=463810558 width=135) - Conds:RS_38._col4=RS_284._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16","_col18"] - <-Map 24 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_284] + Merge Join Operator [MERGEJOIN_247] (rows=8179022 width=300) + Conds:RS_38._col4=RS_289._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16","_col18"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_283] (rows=462000 width=1436) + Select Operator [SEL_288] (rows=462000 width=188) Output:["_col0","_col1"] - Filter Operator [FIL_282] (rows=462000 width=1436) + Filter Operator [FIL_287] (rows=462000 width=188) predicate:i_item_sk is not null - TableScan [TS_20] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 14 [SIMPLE_EDGE] + TableScan [TS_20] (rows=462000 width=188) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_246] (rows=421645953 width=135) - Conds:RS_35._col5=RS_304._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] + Merge Join Operator [MERGEJOIN_246] (rows=8179022 width=116) + Conds:RS_35._col5=RS_286._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_303] (rows=2300 width=1179) + Select Operator [SEL_285] (rows=2300 width=4) Output:["_col0"] - TableScan [TS_18] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 13 [SIMPLE_EDGE] + TableScan [TS_18] (rows=2300 width=4) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_245] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_245] (rows=8179022 width=115) Conds:RS_32._col3=RS_276._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 21 [SIMPLE_EDGE] vectorized + <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_276] PartitionCols:_col0 - Select Operator [SEL_275] (rows=3600 width=107) + Select Operator [SEL_275] (rows=1440 width=97) Output:["_col0"] - Filter Operator [FIL_274] (rows=3600 width=107) + Filter Operator [FIL_274] (rows=1440 width=96) predicate:((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 12 [SIMPLE_EDGE] + TableScan [TS_15] (rows=7200 width=96) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_244] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_244] (rows=40895108 width=121) Conds:RS_29._col2=RS_268._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 19 [SIMPLE_EDGE] vectorized + <-Map 18 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_268] PartitionCols:_col0 - Select Operator [SEL_267] (rows=930900 width=385) + Select Operator [SEL_267] (rows=265971 width=89) Output:["_col0"] - Filter Operator [FIL_266] (rows=930900 width=385) + Filter Operator [FIL_266] (rows=265971 width=89) predicate:((cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_12] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 11 [SIMPLE_EDGE] + TableScan [TS_12] (rows=1861800 width=89) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_243] (rows=316788826 width=135) - Conds:RS_302._col0=RS_260._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 17 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_243] (rows=100076475 width=125) + Conds:RS_284._col0=RS_260._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 16 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_260] PartitionCols:_col0 - Select Operator [SEL_259] (rows=36524 width=1119) + Select Operator [SEL_259] (rows=652 width=106) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_258] (rows=36524 width=1119) + Filter Operator [FIL_258] (rows=652 width=106) predicate:((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] + TableScan [TS_9] (rows=73049 width=106) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] PartitionCols:_col0 - Select Operator [SEL_301] (rows=287989836 width=135) + Select Operator [SEL_283] (rows=282274763 width=31) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_300] (rows=287989836 width=135) - predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_52_inventory_inv_item_sk_min) AND DynamicValue(RS_52_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_52_inventory_inv_item_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Reducer 18 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_282] (rows=282274763 width=31) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=31) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized BROADCAST [RS_265] Group By Operator [GBY_264] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_263] Group By Operator [GBY_262] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_261] (rows=36524 width=1119) + Select Operator [SEL_261] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_259] - <-Reducer 20 [BROADCAST_EDGE] vectorized + <-Reducer 19 [BROADCAST_EDGE] vectorized BROADCAST [RS_273] Group By Operator [GBY_272] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_271] Group By Operator [GBY_270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_269] (rows=930900 width=385) + Select Operator [SEL_269] (rows=265971 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_267] - <-Reducer 22 [BROADCAST_EDGE] vectorized + <-Reducer 21 [BROADCAST_EDGE] vectorized BROADCAST [RS_281] Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_279] Group By Operator [GBY_278] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_277] (rows=3600 width=107) + Select Operator [SEL_277] (rows=1440 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_275] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_289] - Group By Operator [GBY_288] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_285] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_283] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_299] - Group By Operator [GBY_298] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] - Group By Operator [GBY_296] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_295] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_293] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=41342400)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_174] - Group By Operator [GBY_173] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=41342400)"] - Select Operator [SEL_172] (rows=41342400 width=15) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_242] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_242] (rows=37584000 width=111) + Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] + PartitionCols:_col2 + Select Operator [SEL_253] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_252] (rows=37584000 width=15) + predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + PartitionCols:_col0 + Select Operator [SEL_256] (rows=27 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_255] (rows=27 width=104) + predicate:w_warehouse_sk is not null + TableScan [TS_3] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/query73.q.out index fce2733b59..7aafbcf61c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query73.q.out @@ -67,145 +67,133 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 5 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 10 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_141] - Select Operator [SEL_140] (rows=88000001 width=860) + File Output Operator [FS_136] + Select Operator [SEL_135] (rows=59862 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=88000001 width=860) + Select Operator [SEL_34] (rows=59862 width=364) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=88000001 width=860) - Conds:RS_103._col0=RS_139._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + Merge Join Operator [MERGEJOIN_100] (rows=59862 width=364) + Conds:RS_103._col0=RS_134._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=80000000 width=860) + Select Operator [SEL_102] (rows=80000000 width=356) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_101] (rows=80000000 width=860) + Filter Operator [FIL_101] (rows=80000000 width=356) predicate:c_customer_sk is not null - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + TableScan [TS_0] (rows=80000000 width=356) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_134] PartitionCols:_col1 - Filter Operator [FIL_138] (rows=19166256 width=88) + Filter Operator [FIL_133] (rows=59862 width=12) predicate:_col2 BETWEEN 1 AND 5 - Select Operator [SEL_137] (rows=383325119 width=88) + Select Operator [SEL_132] (rows=1197233 width=12) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_136] (rows=383325119 width=88) + Group By Operator [GBY_131] (rows=1197233 width=12) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_25] (rows=766650239 width=88) + Group By Operator [GBY_25] (rows=1197233 width=12) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Merge Join Operator [MERGEJOIN_99] (rows=766650239 width=88) - Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col1","_col4"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + Merge Join Operator [MERGEJOIN_99] (rows=1197233 width=4) + Conds:RS_21._col3=RS_122._col0(Inner),Output:["_col1","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_124] (rows=1704 width=1910) + Select Operator [SEL_121] (rows=85 width=102) Output:["_col0"] - Filter Operator [FIL_123] (rows=1704 width=1910) + Filter Operator [FIL_120] (rows=85 width=102) predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county"] - <-Reducer 7 [SIMPLE_EDGE] + TableScan [TS_12] (rows=1704 width=102) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_98] (rows=696954748 width=88) - Conds:RS_18._col2=RS_117._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_98] (rows=2973700 width=4) + Conds:RS_18._col2=RS_114._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] PartitionCols:_col0 - Select Operator [SEL_116] (rows=1200 width=107) + Select Operator [SEL_113] (rows=480 width=104) Output:["_col0"] - Filter Operator [FIL_115] (rows=1200 width=107) + Filter Operator [FIL_112] (rows=480 width=104) predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] - <-Reducer 6 [SIMPLE_EDGE] + TableScan [TS_9] (rows=7200 width=104) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_97] (rows=633595212 width=88) - Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + Merge Join Operator [MERGEJOIN_97] (rows=44605486 width=10) + Conds:RS_130._col0=RS_106._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_108] (rows=73049 width=1119) + Select Operator [SEL_105] (rows=170 width=12) Output:["_col0"] - Filter Operator [FIL_107] (rows=73049 width=1119) + Filter Operator [FIL_104] (rows=170 width=12) predicate:((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_134] (rows=575995635 width=88) + Select Operator [SEL_129] (rows=479121995 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_133] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_31_customer_c_customer_sk_min) AND DynamicValue(RS_31_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_31_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) + Filter Operator [FIL_128] (rows=479121995 width=19) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=73049 width=1119) + Select Operator [SEL_107] (rows=170 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_108] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_105] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_119] + Group By Operator [GBY_118] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] + Group By Operator [GBY_116] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_118] (rows=1200 width=107) + Select Operator [SEL_115] (rows=480 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_116] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_113] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_126] (rows=1704 width=1910) + Select Operator [SEL_123] (rows=85 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_124] - <-Reducer 4 [BROADCAST_EDGE] vectorized - BROADCAST [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_104] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Please refer to the previous Select Operator [SEL_121] diff --git a/ql/src/test/results/clientpositive/perf/tez/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/query74.q.out index a3e7846465..4b174fe4d4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query74.q.out @@ -131,283 +131,271 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 21 (BROADCAST_EDGE) -Map 11 <- Reducer 23 (BROADCAST_EDGE) -Map 15 <- Reducer 20 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE) -Map 7 <- Reducer 22 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 17 <- Map 24 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 24 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 23 (BROADCAST_EDGE) +Map 13 <- Reducer 25 (BROADCAST_EDGE) +Map 17 <- Reducer 22 (BROADCAST_EDGE) +Map 9 <- Reducer 24 (BROADCAST_EDGE) +Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 19 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_356] - Limit [LIM_355] (rows=100 width=88) + Reducer 8 vectorized + File Output Operator [FS_354] + Limit [LIM_353] (rows=100 width=280) Number of rows:100 - Select Operator [SEL_354] (rows=574987679 width=88) + Select Operator [SEL_352] (rows=12248093 width=280) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_89] - Select Operator [SEL_88] (rows=574987679 width=88) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_93] + Select Operator [SEL_92] (rows=12248093 width=280) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_87] (rows=574987679 width=88) + Filter Operator [FIL_91] (rows=12248093 width=728) predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_279] (rows=1149975359 width=88) - Conds:RS_321._col0=RS_333._col0(Inner),RS_333._col0=RS_343._col0(Inner),RS_333._col0=RS_353._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] + Merge Join Operator [MERGEJOIN_287] (rows=24496186 width=728) + Conds:RS_88._col2=RS_351._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col9"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_351] PartitionCols:_col0 - Select Operator [SEL_332] (rows=116159124 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_331] (rows=116159124 width=88) - predicate:(_col4 > 0) - Select Operator [SEL_330] (rows=348477374 width=88) - Output:["_col0","_col4"] - Group By Operator [GBY_329] (rows=348477374 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_36] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_274] (rows=696954748 width=88) - Conds:RS_32._col1=RS_312._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - PartitionCols:_col0 - Select Operator [SEL_308] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_307] (rows=80000000 width=860) - predicate:(c_customer_id is not null and c_customer_sk is not null) - TableScan [TS_68] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_273] (rows=633595212 width=88) - Conds:RS_328._col0=RS_290._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] - PartitionCols:_col0 - Select Operator [SEL_284] (rows=36524 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_281] (rows=36524 width=1119) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) - TableScan [TS_65] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] - PartitionCols:_col0 - Select Operator [SEL_327] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_326] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_33_customer_c_customer_sk_min) AND DynamicValue(RS_33_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_33_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_20] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_323] - Group By Operator [GBY_322] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] - Group By Operator [GBY_296] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_284] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_325] - Group By Operator [GBY_324] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] - Group By Operator [GBY_316] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_313] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_308] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] - PartitionCols:_col0 - Select Operator [SEL_342] (rows=29040539 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_341] (rows=29040539 width=135) - predicate:(_col4 > 0) - Select Operator [SEL_340] (rows=87121617 width=135) - Output:["_col0","_col4"] - Group By Operator [GBY_339] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_57] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_276] (rows=174243235 width=135) - Conds:RS_53._col1=RS_314._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_308] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_275] (rows=158402938 width=135) - Conds:RS_338._col0=RS_292._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] - PartitionCols:_col0 - Select Operator [SEL_285] (rows=36524 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_282] (rows=36524 width=1119) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] - PartitionCols:_col0 - Select Operator [SEL_337] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_336] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_41] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_335] - Group By Operator [GBY_334] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - Group By Operator [GBY_297] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_285] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] - PartitionCols:_col0 - Select Operator [SEL_352] (rows=348477374 width=88) + Select Operator [SEL_350] (rows=80000000 width=392) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_351] (rows=348477374 width=88) + Group By Operator [GBY_349] (rows=80000000 width=396) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_79] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_78] (rows=696954748 width=88) + Group By Operator [GBY_78] (rows=80000000 width=396) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_278] (rows=696954748 width=88) - Conds:RS_74._col1=RS_309._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] + Merge Join Operator [MERGEJOIN_284] (rows=187573258 width=381) + Conds:RS_74._col1=RS_317._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_317] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_308] - <-Reducer 16 [SIMPLE_EDGE] + Select Operator [SEL_316] (rows=80000000 width=284) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_315] (rows=80000000 width=284) + predicate:(c_customer_id is not null and c_customer_sk is not null) + TableScan [TS_68] (rows=80000000 width=284) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_74] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_277] (rows=633595212 width=88) - Conds:RS_350._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + Merge Join Operator [MERGEJOIN_283] (rows=187573258 width=105) + Conds:RS_348._col0=RS_294._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_283] (rows=36524 width=1119) + Select Operator [SEL_291] (rows=652 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_280] (rows=36524 width=1119) + Filter Operator [FIL_288] (rows=652 width=8) predicate:((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] + TableScan [TS_65] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_348] PartitionCols:_col0 - Select Operator [SEL_349] (rows=575995635 width=88) + Select Operator [SEL_347] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_348] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_75_customer_c_customer_sk_min) AND DynamicValue(RS_75_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_75_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_62] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 20 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_346] (rows=525327388 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_62] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 22 [BROADCAST_EDGE] vectorized BROADCAST [RS_345] Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] - Group By Operator [GBY_294] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_302] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_287] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_283] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_347] - Group By Operator [GBY_346] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] - Group By Operator [GBY_315] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_310] (rows=80000000 width=860) + Select Operator [SEL_295] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_308] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] - PartitionCols:_col0 - Select Operator [SEL_320] (rows=87121617 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_319] (rows=87121617 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_16] (rows=174243235 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 - Merge Join Operator [MERGEJOIN_272] (rows=174243235 width=135) - Conds:RS_12._col1=RS_311._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_308] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_271] (rows=158402938 width=135) - Conds:RS_306._col0=RS_288._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_283] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - PartitionCols:_col0 - Select Operator [SEL_305] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_304] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_303] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] - Group By Operator [GBY_295] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_289] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_283] + Please refer to the previous Select Operator [SEL_291] + <-Reducer 6 [ONE_TO_ONE_EDGE] + FORWARD [RS_88] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_286] (rows=20485011 width=436) + Conds:RS_85._col2=RS_343._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] + PartitionCols:_col0 + Select Operator [SEL_342] (rows=17130654 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_341] (rows=17130654 width=212) + predicate:(_col4 > 0) + Select Operator [SEL_340] (rows=51391963 width=212) + Output:["_col0","_col4"] + Group By Operator [GBY_339] (rows=51391963 width=396) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_57] (rows=51391963 width=396) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 + Merge Join Operator [MERGEJOIN_282] (rows=51391963 width=395) + Conds:RS_53._col1=RS_320._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_316] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_281] (rows=51391963 width=119) + Conds:RS_338._col0=RS_300._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_300] + PartitionCols:_col0 + Select Operator [SEL_293] (rows=652 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_290] (rows=652 width=8) + predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) + Please refer to the previous TableScan [TS_65] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] + PartitionCols:_col0 + Select Operator [SEL_337] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_336] (rows=143930993 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_41] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_309] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_301] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_85] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_285] (rows=31888273 width=324) + Conds:RS_323._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + PartitionCols:_col0 + Select Operator [SEL_332] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_331] (rows=26666666 width=212) + predicate:(_col4 > 0) + Select Operator [SEL_330] (rows=80000000 width=212) + Output:["_col0","_col4"] + Group By Operator [GBY_329] (rows=80000000 width=396) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_36] (rows=80000000 width=396) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 + Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=381) + Conds:RS_32._col1=RS_319._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_316] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_279] (rows=187573258 width=105) + Conds:RS_328._col0=RS_298._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_298] + PartitionCols:_col0 + Select Operator [SEL_292] (rows=652 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_289] (rows=652 width=8) + predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) + Please refer to the previous TableScan [TS_65] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] + PartitionCols:_col0 + Select Operator [SEL_327] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_326] (rows=525327388 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_20] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_308] + Group By Operator [GBY_304] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_299] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_292] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Select Operator [SEL_322] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_321] (rows=51391963 width=396) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_16] (rows=51391963 width=396) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["max(_col2)"],keys:_col6, _col7, _col8, _col4 + Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=395) + Conds:RS_12._col1=RS_318._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_316] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=119) + Conds:RS_314._col0=RS_296._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_296] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_291] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] + PartitionCols:_col0 + Select Operator [SEL_313] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_312] (rows=143930993 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_311] + Group By Operator [GBY_310] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_307] + Group By Operator [GBY_303] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_297] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_291] diff --git a/ql/src/test/results/clientpositive/perf/tez/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/query75.q.out index 2ca1fb22ca..9968adef41 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query75.q.out @@ -206,91 +206,91 @@ Stage-0 Stage-1 Reducer 10 vectorized File Output Operator [FS_638] - Select Operator [SEL_637] (rows=100 width=111) + Select Operator [SEL_637] (rows=100 width=160) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Limit [LIM_636] (rows=100 width=111) + Limit [LIM_636] (rows=100 width=152) Number of rows:100 - Select Operator [SEL_635] (rows=70276244 width=111) + Select Operator [SEL_635] (rows=3422897230256 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_175] - Select Operator [SEL_174] (rows=70276244 width=111) + Select Operator [SEL_174] (rows=3422897230256 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_173] (rows=70276244 width=111) + Filter Operator [FIL_173] (rows=3422897230256 width=255) predicate:((CAST( _col10 AS decimal(17,2)) / CAST( _col4 AS decimal(17,2))) < 0.9) - Merge Join Operator [MERGEJOIN_518] (rows=210828734 width=111) + Merge Join Operator [MERGEJOIN_518] (rows=10268691690770 width=255) Conds:RS_631._col0, _col1, _col2, _col3=RS_634._col0, _col1, _col2, _col3(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col10","_col11"] <-Reducer 27 [SIMPLE_EDGE] vectorized SHUFFLE [RS_634] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_633] (rows=191662482 width=111) + Group By Operator [GBY_633] (rows=84235776 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 - Group By Operator [GBY_632] (rows=383324964 width=111) + Group By Operator [GBY_632] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 26 [SIMPLE_EDGE] <-Reducer 25 [CONTAINS] vectorized Reduce Output Operator [RS_670] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_669] (rows=766649929 width=111) + Group By Operator [GBY_669] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_668] (rows=574982367 width=103) + Group By Operator [GBY_668] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 24 [SIMPLE_EDGE] <-Reducer 23 [CONTAINS] Reduce Output Operator [RS_542] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_541] (rows=1149964734 width=103) + Group By Operator [GBY_541] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_539] (rows=383314495 width=135) + Select Operator [SEL_539] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_538] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_538] (rows=170474971 width=234) Conds:RS_103._col1, _col2=RS_625._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] <-Map 44 [SIMPLE_EDGE] vectorized SHUFFLE [RS_625] PartitionCols:_col0, _col1 - Select Operator [SEL_623] (rows=28798881 width=106) + Select Operator [SEL_623] (rows=28798881 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_622] (rows=28798881 width=106) + Filter Operator [FIL_622] (rows=28798881 width=121) predicate:cr_item_sk is not null - TableScan [TS_9] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] + TableScan [TS_9] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_510] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_510] (rows=96821196 width=138) Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_599] PartitionCols:_col0 - Select Operator [SEL_592] (rows=231000 width=1436) + Select Operator [SEL_592] (rows=45745 width=109) Output:["_col0","_col1","_col2","_col3","_col5"] - Filter Operator [FIL_591] (rows=231000 width=1436) + Filter Operator [FIL_591] (rows=45745 width=109) predicate:((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id","i_category","i_manufact_id"] + TableScan [TS_6] (rows=462000 width=109) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id","i_category","i_manufact_id"] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_100] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_509] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_509] (rows=101592102 width=122) Conds:RS_667._col0=RS_571._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_571] PartitionCols:_col0 - Select Operator [SEL_562] (rows=36524 width=1119) + Select Operator [SEL_562] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_558] (rows=36524 width=1119) + Filter Operator [FIL_558] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 49 [SIMPLE_EDGE] vectorized SHUFFLE [RS_667] PartitionCols:_col0 - Select Operator [SEL_666] (rows=287989836 width=135) + Select Operator [SEL_666] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_665] (rows=287989836 width=135) + Filter Operator [FIL_665] (rows=286549727 width=127) predicate:((cs_item_sk BETWEEN DynamicValue(RS_101_item_i_item_sk_min) AND DynamicValue(RS_101_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_101_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_98_date_dim_d_date_sk_min) AND DynamicValue(RS_98_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_98_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_85] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + TableScan [TS_85] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] <-Reducer 28 [BROADCAST_EDGE] vectorized BROADCAST [RS_662] Group By Operator [GBY_661] (rows=1 width=12) @@ -299,7 +299,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_586] Group By Operator [GBY_580] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_572] (rows=36524 width=1119) + Select Operator [SEL_572] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_562] <-Reducer 41 [BROADCAST_EDGE] vectorized @@ -310,31 +310,31 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_614] Group By Operator [GBY_608] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_600] (rows=231000 width=1436) + Select Operator [SEL_600] (rows=45745 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_592] <-Reducer 31 [CONTAINS] Reduce Output Operator [RS_551] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_550] (rows=1149964734 width=103) + Group By Operator [GBY_550] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_548] (rows=766650239 width=88) + Select Operator [SEL_548] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_547] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_547] (rows=450703984 width=204) Conds:RS_125._col1, _col2=RS_649._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] <-Map 46 [SIMPLE_EDGE] vectorized SHUFFLE [RS_649] PartitionCols:_col0, _col1 - Select Operator [SEL_647] (rows=57591150 width=77) + Select Operator [SEL_647] (rows=57591150 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_646] (rows=57591150 width=77) + Filter Operator [FIL_646] (rows=57591150 width=119) predicate:sr_item_sk is not null - TableScan [TS_31] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] + TableScan [TS_31] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_125] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_513] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_513] (rows=187186493 width=124) Conds:RS_122._col1=RS_601._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_601] @@ -343,25 +343,25 @@ Stage-0 <-Reducer 29 [SIMPLE_EDGE] SHUFFLE [RS_122] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_512] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_512] (rows=196410188 width=109) Conds:RS_677._col0=RS_573._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_573] PartitionCols:_col0 - Select Operator [SEL_563] (rows=36524 width=1119) + Select Operator [SEL_563] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_559] (rows=36524 width=1119) + Filter Operator [FIL_559] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 50 [SIMPLE_EDGE] vectorized SHUFFLE [RS_677] PartitionCols:_col0 - Select Operator [SEL_676] (rows=575995635 width=88) + Select Operator [SEL_676] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_675] (rows=575995635 width=88) + Filter Operator [FIL_675] (rows=550076554 width=122) predicate:((ss_item_sk BETWEEN DynamicValue(RS_123_item_i_item_sk_min) AND DynamicValue(RS_123_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_123_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_120_date_dim_d_date_sk_min) AND DynamicValue(RS_120_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_120_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_107] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + TableScan [TS_107] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] <-Reducer 32 [BROADCAST_EDGE] vectorized BROADCAST [RS_672] Group By Operator [GBY_671] (rows=1 width=12) @@ -370,7 +370,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_587] Group By Operator [GBY_581] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_574] (rows=36524 width=1119) + Select Operator [SEL_574] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_563] <-Reducer 42 [BROADCAST_EDGE] vectorized @@ -381,31 +381,31 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_615] Group By Operator [GBY_609] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_602] (rows=231000 width=1436) + Select Operator [SEL_602] (rows=45745 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_592] <-Reducer 35 [CONTAINS] Reduce Output Operator [RS_556] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_555] (rows=766649929 width=111) + Group By Operator [GBY_555] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_553] (rows=191667562 width=135) + Select Operator [SEL_553] (rows=115177968 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_552] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_552] (rows=115177968 width=220) Conds:RS_154._col1, _col2=RS_660._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] <-Map 48 [SIMPLE_EDGE] vectorized SHUFFLE [RS_660] PartitionCols:_col0, _col1 - Select Operator [SEL_658] (rows=14398467 width=92) + Select Operator [SEL_658] (rows=14398467 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_657] (rows=14398467 width=92) + Filter Operator [FIL_657] (rows=14398467 width=118) predicate:wr_item_sk is not null - TableScan [TS_60] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] + TableScan [TS_60] (rows=14398467 width=118) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_154] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_516] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_516] (rows=48990732 width=139) Conds:RS_151._col1=RS_603._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_603] @@ -414,25 +414,25 @@ Stage-0 <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_151] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_515] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_515] (rows=51404771 width=123) Conds:RS_684._col0=RS_575._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_575] PartitionCols:_col0 - Select Operator [SEL_564] (rows=36524 width=1119) + Select Operator [SEL_564] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_560] (rows=36524 width=1119) + Filter Operator [FIL_560] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 51 [SIMPLE_EDGE] vectorized SHUFFLE [RS_684] PartitionCols:_col0 - Select Operator [SEL_683] (rows=144002668 width=135) + Select Operator [SEL_683] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_682] (rows=144002668 width=135) + Filter Operator [FIL_682] (rows=143966864 width=127) predicate:((ws_item_sk BETWEEN DynamicValue(RS_152_item_i_item_sk_min) AND DynamicValue(RS_152_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_152_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_149_date_dim_d_date_sk_min) AND DynamicValue(RS_149_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_149_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_136] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] + TableScan [TS_136] (rows=144002668 width=127) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] <-Reducer 36 [BROADCAST_EDGE] vectorized BROADCAST [RS_679] Group By Operator [GBY_678] (rows=1 width=12) @@ -441,7 +441,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_588] Group By Operator [GBY_582] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_576] (rows=36524 width=1119) + Select Operator [SEL_576] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_564] <-Reducer 43 [BROADCAST_EDGE] vectorized @@ -452,25 +452,25 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_616] Group By Operator [GBY_610] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_604] (rows=231000 width=1436) + Select Operator [SEL_604] (rows=45745 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_592] <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_631] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_630] (rows=191662482 width=111) + Group By Operator [GBY_630] (rows=84235776 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 - Group By Operator [GBY_629] (rows=383324964 width=111) + Group By Operator [GBY_629] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 7 [SIMPLE_EDGE] <-Reducer 19 [CONTAINS] Reduce Output Operator [RS_537] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_536] (rows=766649929 width=111) + Group By Operator [GBY_536] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_534] (rows=191667562 width=135) + Select Operator [SEL_534] (rows=115177968 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_533] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_533] (rows=115177968 width=220) Conds:RS_69._col1, _col2=RS_659._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] <-Map 48 [SIMPLE_EDGE] vectorized SHUFFLE [RS_659] @@ -479,7 +479,7 @@ Stage-0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_507] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_507] (rows=48990732 width=139) Conds:RS_66._col1=RS_597._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_597] @@ -488,25 +488,25 @@ Stage-0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_506] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_506] (rows=51404771 width=123) Conds:RS_656._col0=RS_569._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_569] PartitionCols:_col0 - Select Operator [SEL_561] (rows=36524 width=1119) + Select Operator [SEL_561] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_557] (rows=36524 width=1119) + Filter Operator [FIL_557] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 47 [SIMPLE_EDGE] vectorized SHUFFLE [RS_656] PartitionCols:_col0 - Select Operator [SEL_655] (rows=144002668 width=135) + Select Operator [SEL_655] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_654] (rows=144002668 width=135) + Filter Operator [FIL_654] (rows=143966864 width=127) predicate:((ws_item_sk BETWEEN DynamicValue(RS_67_item_i_item_sk_min) AND DynamicValue(RS_67_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_67_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_64_date_dim_d_date_sk_min) AND DynamicValue(RS_64_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_64_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_51] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] + TableScan [TS_51] (rows=144002668 width=127) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] <-Reducer 20 [BROADCAST_EDGE] vectorized BROADCAST [RS_651] Group By Operator [GBY_650] (rows=1 width=12) @@ -515,7 +515,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_585] Group By Operator [GBY_579] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_570] (rows=36524 width=1119) + Select Operator [SEL_570] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_561] <-Reducer 40 [BROADCAST_EDGE] vectorized @@ -526,25 +526,25 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_613] Group By Operator [GBY_607] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_598] (rows=231000 width=1436) + Select Operator [SEL_598] (rows=45745 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_592] <-Reducer 6 [CONTAINS] vectorized Reduce Output Operator [RS_628] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_627] (rows=766649929 width=111) + Group By Operator [GBY_627] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_626] (rows=574982367 width=103) + Group By Operator [GBY_626] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 5 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] Reduce Output Operator [RS_532] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_531] (rows=1149964734 width=103) + Group By Operator [GBY_531] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_529] (rows=766650239 width=88) + Select Operator [SEL_529] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_528] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_528] (rows=450703984 width=204) Conds:RS_40._col1, _col2=RS_648._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] <-Map 46 [SIMPLE_EDGE] vectorized SHUFFLE [RS_648] @@ -553,7 +553,7 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_504] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_504] (rows=187186493 width=124) Conds:RS_37._col1=RS_595._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_595] @@ -562,7 +562,7 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_503] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_503] (rows=196410188 width=109) Conds:RS_645._col0=RS_567._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_567] @@ -571,12 +571,12 @@ Stage-0 <-Map 45 [SIMPLE_EDGE] vectorized SHUFFLE [RS_645] PartitionCols:_col0 - Select Operator [SEL_644] (rows=575995635 width=88) + Select Operator [SEL_644] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_643] (rows=575995635 width=88) + Filter Operator [FIL_643] (rows=550076554 width=122) predicate:((ss_item_sk BETWEEN DynamicValue(RS_38_item_i_item_sk_min) AND DynamicValue(RS_38_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_38_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_22] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + TableScan [TS_22] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_640] Group By Operator [GBY_639] (rows=1 width=12) @@ -585,7 +585,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_584] Group By Operator [GBY_578] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_568] (rows=36524 width=1119) + Select Operator [SEL_568] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_561] <-Reducer 39 [BROADCAST_EDGE] vectorized @@ -596,17 +596,17 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_612] Group By Operator [GBY_606] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_596] (rows=231000 width=1436) + Select Operator [SEL_596] (rows=45745 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_592] <-Reducer 4 [CONTAINS] Reduce Output Operator [RS_523] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_522] (rows=1149964734 width=103) + Group By Operator [GBY_522] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_520] (rows=383314495 width=135) + Select Operator [SEL_520] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_519] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_519] (rows=170474971 width=234) Conds:RS_18._col1, _col2=RS_624._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] <-Map 44 [SIMPLE_EDGE] vectorized SHUFFLE [RS_624] @@ -615,7 +615,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_501] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_501] (rows=96821196 width=138) Conds:RS_15._col1=RS_593._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_593] @@ -624,7 +624,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_500] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_500] (rows=101592102 width=122) Conds:RS_621._col0=RS_565._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_565] @@ -633,12 +633,12 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_621] PartitionCols:_col0 - Select Operator [SEL_620] (rows=287989836 width=135) + Select Operator [SEL_620] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_619] (rows=287989836 width=135) + Filter Operator [FIL_619] (rows=286549727 width=127) predicate:((cs_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + TableScan [TS_0] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] <-Reducer 12 [BROADCAST_EDGE] vectorized BROADCAST [RS_590] Group By Operator [GBY_589] (rows=1 width=12) @@ -647,7 +647,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_583] Group By Operator [GBY_577] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_566] (rows=36524 width=1119) + Select Operator [SEL_566] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_561] <-Reducer 38 [BROADCAST_EDGE] vectorized @@ -658,7 +658,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_611] Group By Operator [GBY_605] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_594] (rows=231000 width=1436) + Select Operator [SEL_594] (rows=45745 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_592] diff --git a/ql/src/test/results/clientpositive/perf/tez/query76.q.out b/ql/src/test/results/clientpositive/perf/tez/query76.q.out index 2b9504c748..add01414bb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query76.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query76.q.out @@ -59,222 +59,147 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 16 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Map 17 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) -Map 20 <- Reducer 13 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) -Reducer 10 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 12 <- Map 21 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 13 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 10 <- Map 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 9 <- Map 18 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_199] - Limit [LIM_198] (rows=100 width=108) + File Output Operator [FS_183] + Limit [LIM_182] (rows=100 width=408) Number of rows:100 - Select Operator [SEL_197] (rows=304916424 width=108) + Select Operator [SEL_181] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] - Group By Operator [GBY_195] (rows=304916424 width=108) + SHUFFLE [RS_180] + Group By Operator [GBY_179] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 4 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] + <-Reducer 10 [CONTAINS] Reduce Output Operator [RS_167] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_166] (rows=609832848 width=108) + Group By Operator [GBY_166] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_165] (rows=609832848 width=108) + Top N Key Operator [TNK_165] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,sort order:+++++,top n:100 - Select Operator [SEL_163] (rows=174233858 width=135) + Select Operator [SEL_163] (rows=1433911 width=399) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_162] (rows=174233858 width=135) - Conds:RS_45._col0=RS_217._col0(Inner),Output:["_col3","_col5","_col7","_col8"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_217] + Merge Join Operator [MERGEJOIN_162] (rows=1433911 width=209) + Conds:RS_45._col0=RS_195._col0(Inner),Output:["_col3","_col5","_col7","_col8"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_216] (rows=73049 width=1119) + Select Operator [SEL_194] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_215] (rows=73049 width=1119) + Filter Operator [FIL_193] (rows=73049 width=12) predicate:d_date_sk is not null - TableScan [TS_39] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 11 [SIMPLE_EDGE] + TableScan [TS_39] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_148] (rows=158394413 width=135) - Conds:RS_225._col2=RS_174._col0(Inner),Output:["_col0","_col3","_col5"] + Merge Join Operator [MERGEJOIN_148] (rows=1433911 width=205) + Conds:RS_192._col2=RS_172._col0(Inner),Output:["_col0","_col3","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_174] + SHUFFLE [RS_172] PartitionCols:_col0 - Select Operator [SEL_169] (rows=462000 width=1436) + Select Operator [SEL_169] (rows=462000 width=94) Output:["_col0","_col1"] - Filter Operator [FIL_168] (rows=462000 width=1436) + Filter Operator [FIL_168] (rows=462000 width=94) predicate:i_item_sk is not null - TableScan [TS_0] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_category"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + TableScan [TS_0] (rows=462000 width=94) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_192] PartitionCols:_col2 - Select Operator [SEL_224] (rows=143994918 width=135) + Select Operator [SEL_191] (rows=1433911 width=119) Output:["_col0","_col2","_col3"] - Filter Operator [FIL_223] (rows=143994918 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_43_item_i_item_sk_min) AND DynamicValue(RS_43_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_43_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_46_date_dim_d_date_sk_min) AND DynamicValue(RS_46_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_46_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is null) - TableScan [TS_33] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_214] - Group By Operator [GBY_213] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_181] - Group By Operator [GBY_178] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_175] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_169] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_222] - Group By Operator [GBY_221] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] - Group By Operator [GBY_219] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_218] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Filter Operator [FIL_190] (rows=1433911 width=123) + predicate:(cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is null) + TableScan [TS_33] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 3 [CONTAINS] Reduce Output Operator [RS_155] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_154] (rows=609832848 width=108) + Group By Operator [GBY_154] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_153] (rows=609832848 width=108) + Top N Key Operator [TNK_153] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,sort order:+++++,top n:100 - Select Operator [SEL_151] (rows=348477373 width=88) + Select Operator [SEL_151] (rows=24749363 width=387) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_150] (rows=348477373 width=88) - Conds:RS_12._col2=RS_186._col0(Inner),Output:["_col1","_col5","_col7","_col8"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + Merge Join Operator [MERGEJOIN_150] (rows=24749363 width=204) + Conds:RS_12._col2=RS_178._col0(Inner),Output:["_col1","_col5","_col7","_col8"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_178] PartitionCols:_col0 - Select Operator [SEL_185] (rows=73049 width=1119) + Select Operator [SEL_177] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_184] (rows=73049 width=1119) + Filter Operator [FIL_176] (rows=73049 width=12) predicate:d_date_sk is not null - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_144] (rows=316797605 width=88) - Conds:RS_170._col0=RS_194._col1(Inner),Output:["_col1","_col2","_col5"] + Merge Join Operator [MERGEJOIN_144] (rows=24749363 width=200) + Conds:RS_170._col0=RS_175._col1(Inner),Output:["_col1","_col2","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_170] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_169] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_175] PartitionCols:_col1 - Select Operator [SEL_193] (rows=287997817 width=88) + Select Operator [SEL_174] (rows=24749363 width=114) Output:["_col0","_col1","_col3"] - Filter Operator [FIL_192] (rows=287997817 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_9_item_i_item_sk_min) AND DynamicValue(RS_9_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_9_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_191] - Group By Operator [GBY_190] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] - Group By Operator [GBY_188] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_187] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_185] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_179] - Group By Operator [GBY_176] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_171] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_169] - <-Reducer 9 [CONTAINS] + Filter Operator [FIL_173] (rows=24749363 width=118) + predicate:(ss_addr_sk is null and ss_item_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_3] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 8 [CONTAINS] Reduce Output Operator [RS_161] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_160] (rows=609832848 width=108) + Group By Operator [GBY_160] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_159] (rows=609832848 width=108) + Top N Key Operator [TNK_159] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,sort order:+++++,top n:100 - Select Operator [SEL_157] (rows=87121617 width=135) + Select Operator [SEL_157] (rows=35728 width=394) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_156] (rows=87121617 width=135) - Conds:RS_28._col0=RS_204._col0(Inner),Output:["_col3","_col5","_col7","_col8"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] + Merge Join Operator [MERGEJOIN_156] (rows=35728 width=209) + Conds:RS_28._col0=RS_189._col0(Inner),Output:["_col3","_col5","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_203] (rows=73049 width=1119) + Select Operator [SEL_188] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_202] (rows=73049 width=1119) + Filter Operator [FIL_187] (rows=73049 width=12) predicate:d_date_sk is not null - TableScan [TS_22] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 8 [SIMPLE_EDGE] + TableScan [TS_22] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_146] (rows=79201469 width=135) - Conds:RS_212._col1=RS_172._col0(Inner),Output:["_col0","_col3","_col5"] + Merge Join Operator [MERGEJOIN_146] (rows=35728 width=205) + Conds:RS_186._col1=RS_171._col0(Inner),Output:["_col0","_col3","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] + SHUFFLE [RS_171] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_169] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] PartitionCols:_col1 - Select Operator [SEL_211] (rows=72001334 width=135) + Select Operator [SEL_185] (rows=35728 width=119) Output:["_col0","_col1","_col3"] - Filter Operator [FIL_210] (rows=72001334 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_26_item_i_item_sk_min) AND DynamicValue(RS_26_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_26_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null and ws_web_page_sk is null) - TableScan [TS_16] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_ext_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_201] - Group By Operator [GBY_200] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] - Group By Operator [GBY_177] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_173] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_169] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_209] - Group By Operator [GBY_208] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] - Group By Operator [GBY_206] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_205] (rows=73049 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_203] + Filter Operator [FIL_184] (rows=35728 width=123) + predicate:(ws_item_sk is not null and ws_sold_date_sk is not null and ws_web_page_sk is null) + TableScan [TS_16] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_ext_sales_price"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query77.q.out b/ql/src/test/results/clientpositive/perf/tez/query77.q.out index d5671a1ec1..ac1b0e4bad 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query77.q.out @@ -234,31 +234,29 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE) -Map 31 <- Reducer 17 (BROADCAST_EDGE) -Map 33 <- Reducer 24 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE) +Map 30 <- Reducer 17 (BROADCAST_EDGE) +Map 32 <- Reducer 24 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 30 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 29 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 12 <- Map 28 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 31 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 14 <- Map 30 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 19 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 17 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 32 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 18 <- Map 31 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 33 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 21 <- Map 34 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 20 <- Map 32 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 21 <- Map 33 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 27 (ONE_TO_ONE_EDGE), Union 6 (CONTAINS) Reducer 24 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 36 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 26 <- Map 34 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 25 <- Map 34 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 26 <- Map 33 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) Reducer 27 <- Reducer 26 (SIMPLE_EDGE) -Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 28 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 13 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Union 6 (CONTAINS) Reducer 7 <- Union 6 (SIMPLE_EDGE) @@ -269,282 +267,271 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_372] - Limit [LIM_371] (rows=100 width=163) + File Output Operator [FS_367] + Limit [LIM_366] (rows=23 width=439) Number of rows:100 - Select Operator [SEL_370] (rows=956329968 width=163) + Select Operator [SEL_365] (rows=23 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_369] - Select Operator [SEL_368] (rows=956329968 width=163) + SHUFFLE [RS_364] + Select Operator [SEL_363] (rows=23 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_367] (rows=956329968 width=163) + Group By Operator [GBY_362] (rows=23 width=447) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] Reduce Output Operator [RS_322] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_321] (rows=1912659936 width=163) + Group By Operator [GBY_321] (rows=52 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_320] (rows=637553312 width=163) + Top N Key Operator [TNK_320] (rows=35 width=435) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_318] (rows=158394413 width=360) + Select Operator [SEL_318] (rows=2 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_317] (rows=158394413 width=360) + Merge Join Operator [MERGEJOIN_317] (rows=2 width=452) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_379] - Group By Operator [GBY_378] (rows=158394413 width=135) + PARTITION_ONLY_SHUFFLE [RS_374] + Group By Operator [GBY_373] (rows=2 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col0 - Group By Operator [GBY_54] (rows=316788826 width=135) + Group By Operator [GBY_54] (rows=56 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=316788826 width=135) - Conds:RS_377._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_302] (rows=31836679 width=222) + Conds:RS_372._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_334] PartitionCols:_col0 - Select Operator [SEL_330] (rows=8116 width=1119) + Select Operator [SEL_330] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_329] (rows=8116 width=1119) + Filter Operator [FIL_329] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_377] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_372] PartitionCols:_col0 - Select Operator [SEL_376] (rows=287989836 width=135) + Select Operator [SEL_371] (rows=286549727 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_375] (rows=287989836 width=135) + Filter Operator [FIL_370] (rows=286549727 width=231) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_44] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] + TableScan [TS_44] (rows=287989836 width=231) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_374] - Group By Operator [GBY_373] (rows=1 width=12) + BROADCAST [RS_369] + Group By Operator [GBY_368] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_344] Group By Operator [GBY_341] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_335] (rows=8116 width=1119) + Select Operator [SEL_335] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_330] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_384] - Group By Operator [GBY_383] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_379] + Group By Operator [GBY_378] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_69] Group By Operator [GBY_68] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(_col1)","sum(_col2)"] - Merge Join Operator [MERGEJOIN_303] (rows=31678769 width=106) - Conds:RS_382._col0=RS_336._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_303] (rows=3199657 width=183) + Conds:RS_377._col0=RS_336._col0(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_336] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_330] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_382] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_377] PartitionCols:_col0 - Select Operator [SEL_381] (rows=28798881 width=106) + Select Operator [SEL_376] (rows=28798881 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_380] (rows=28798881 width=106) + Filter Operator [FIL_375] (rows=28798881 width=223) predicate:cr_returned_date_sk is not null - TableScan [TS_58] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] + TableScan [TS_58] (rows=28798881 width=223) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] <-Reducer 23 [CONTAINS] Reduce Output Operator [RS_328] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_327] (rows=1912659936 width=163) + Group By Operator [GBY_327] (rows=52 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_326] (rows=637553312 width=163) + Top N Key Operator [TNK_326] (rows=35 width=435) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_324] (rows=95833780 width=135) + Select Operator [SEL_324] (rows=23 width=435) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_323] (rows=95833780 width=135) - Conds:RS_400._col0=RS_405._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_323] (rows=23 width=452) + Conds:RS_390._col0=RS_395._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_400] + FORWARD [RS_390] PartitionCols:_col0 - Group By Operator [GBY_399] (rows=87121617 width=135) + Group By Operator [GBY_389] (rows=23 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0 - Group By Operator [GBY_93] (rows=174243235 width=135) + Group By Operator [GBY_93] (rows=345 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_305] (rows=174243235 width=135) - Conds:RS_89._col1=RS_389._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 34 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_389] + Merge Join Operator [MERGEJOIN_305] (rows=15991254 width=227) + Conds:RS_89._col1=RS_387._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_387] PartitionCols:_col0 - Select Operator [SEL_388] (rows=4602 width=585) + Select Operator [SEL_386] (rows=4602 width=4) Output:["_col0"] - Filter Operator [FIL_387] (rows=4602 width=585) + Filter Operator [FIL_385] (rows=4602 width=4) predicate:wp_web_page_sk is not null - TableScan [TS_83] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + TableScan [TS_83] (rows=4602 width=4) + default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_89] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=158402938 width=135) - Conds:RS_398._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_304] (rows=15991254 width=227) + Conds:RS_384._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_337] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_330] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_398] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_384] PartitionCols:_col0 - Select Operator [SEL_397] (rows=144002668 width=135) + Select Operator [SEL_383] (rows=143931136 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_396] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_90_web_page_wp_web_page_sk_min) AND DynamicValue(RS_90_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_90_web_page_wp_web_page_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_77] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] + Filter Operator [FIL_382] (rows=143931136 width=231) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_77] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_386] - Group By Operator [GBY_385] (rows=1 width=12) + BROADCAST [RS_381] + Group By Operator [GBY_380] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_345] Group By Operator [GBY_342] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_338] (rows=8116 width=1119) + Select Operator [SEL_338] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_330] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_395] - Group By Operator [GBY_394] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_393] - Group By Operator [GBY_392] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_390] (rows=4602 width=585) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_388] <-Reducer 27 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_405] + FORWARD [RS_395] PartitionCols:_col0 - Group By Operator [GBY_404] (rows=8711072 width=92) + Group By Operator [GBY_394] (rows=23 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_114] PartitionCols:_col0 - Group By Operator [GBY_113] (rows=17422145 width=92) + Group By Operator [GBY_113] (rows=23 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_307] (rows=17422145 width=92) - Conds:RS_109._col1=RS_391._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 34 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_391] + Merge Join Operator [MERGEJOIN_307] (rows=1458758 width=137) + Conds:RS_109._col1=RS_388._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_388] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_388] + Please refer to the previous Select Operator [SEL_386] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_306] (rows=15838314 width=92) - Conds:RS_403._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_306] (rows=1458758 width=135) + Conds:RS_393._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_339] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_330] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_403] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_393] PartitionCols:_col0 - Select Operator [SEL_402] (rows=14398467 width=92) + Select Operator [SEL_392] (rows=13129719 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_401] (rows=14398467 width=92) + Filter Operator [FIL_391] (rows=13129719 width=221) predicate:(wr_returned_date_sk is not null and wr_web_page_sk is not null) - TableScan [TS_97] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] + TableScan [TS_97] (rows=14398467 width=221) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_316] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_315] (rows=1912659936 width=163) + Group By Operator [GBY_315] (rows=52 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_314] (rows=637553312 width=163) + Top N Key Operator [TNK_314] (rows=35 width=435) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_312] (rows=383325119 width=88) + Select Operator [SEL_312] (rows=10 width=437) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_311] (rows=383325119 width=88) - Conds:RS_361._col0=RS_366._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_311] (rows=10 width=452) + Conds:RS_356._col0=RS_361._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_366] + FORWARD [RS_361] PartitionCols:_col0 - Group By Operator [GBY_365] (rows=34842647 width=77) + Group By Operator [GBY_360] (rows=10 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 - Group By Operator [GBY_36] (rows=69685294 width=77) + Group By Operator [GBY_36] (rows=40 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_301] (rows=69685294 width=77) - Conds:RS_32._col1=RS_352._col0(Inner),Output:["_col2","_col3","_col6"] + Merge Join Operator [MERGEJOIN_301] (rows=5959021 width=157) + Conds:RS_32._col1=RS_354._col0(Inner),Output:["_col2","_col3","_col6"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + SHUFFLE [RS_354] PartitionCols:_col0 - Select Operator [SEL_349] (rows=1704 width=1910) + Select Operator [SEL_352] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_348] (rows=1704 width=1910) + Filter Operator [FIL_351] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_6] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + TableScan [TS_6] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_300] (rows=63350266 width=77) - Conds:RS_364._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_300] (rows=5959021 width=156) + Conds:RS_359._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_333] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_330] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_364] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_359] PartitionCols:_col0 - Select Operator [SEL_363] (rows=57591150 width=77) + Select Operator [SEL_358] (rows=53634860 width=223) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_362] (rows=57591150 width=77) + Filter Operator [FIL_357] (rows=53634860 width=223) predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) - TableScan [TS_20] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] + TableScan [TS_20] (rows=57591150 width=223) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_361] + FORWARD [RS_356] PartitionCols:_col0 - Group By Operator [GBY_360] (rows=348477374 width=88) + Group By Operator [GBY_355] (rows=10 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=320 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_299] (rows=696954748 width=88) - Conds:RS_12._col1=RS_350._col0(Inner),Output:["_col2","_col3","_col6"] + Merge Join Operator [MERGEJOIN_299] (rows=58365993 width=137) + Conds:RS_12._col1=RS_353._col0(Inner),Output:["_col2","_col3","_col6"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] + SHUFFLE [RS_353] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_349] + Please refer to the previous Select Operator [SEL_352] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=633595212 width=88) - Conds:RS_359._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_298] (rows=58365993 width=135) + Conds:RS_350._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_331] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_330] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_359] + SHUFFLE [RS_350] PartitionCols:_col0 - Select Operator [SEL_358] (rows=575995635 width=88) + Select Operator [SEL_349] (rows=525329897 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_357] (rows=575995635 width=88) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_13_store_s_store_sk_min) AND DynamicValue(RS_13_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_13_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + Filter Operator [FIL_348] (rows=525329897 width=221) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_347] Group By Operator [GBY_346] (rows=1 width=12) @@ -553,18 +540,7 @@ Stage-0 SHUFFLE [RS_343] Group By Operator [GBY_340] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_332] (rows=8116 width=1119) + Select Operator [SEL_332] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_330] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_356] - Group By Operator [GBY_355] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] - Group By Operator [GBY_353] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_351] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_349] diff --git a/ql/src/test/results/clientpositive/perf/tez/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/query78.q.out index a8fee3bbe4..853759ff15 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query78.q.out @@ -156,59 +156,59 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_238] - Limit [LIM_237] (rows=100 width=88) + Limit [LIM_237] (rows=100 width=484) Number of rows:100 - Select Operator [SEL_236] (rows=52707204 width=88) + Select Operator [SEL_236] (rows=232433056973 width=483) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_73] - Select Operator [SEL_72] (rows=52707204 width=88) + Select Operator [SEL_72] (rows=232433056973 width=719) Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_71] (rows=52707204 width=88) + Filter Operator [FIL_71] (rows=232433056973 width=701) predicate:CASE WHEN (_col11 is not null) THEN ((_col11 > 0L)) ELSE (false) END - Merge Join Operator [MERGEJOIN_191] (rows=105414409 width=88) + Merge Join Operator [MERGEJOIN_191] (rows=464866113946 width=701) Conds:RS_68._col1=RS_235._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_235] PartitionCols:_col0 - Select Operator [SEL_234] (rows=43558464 width=135) + Select Operator [SEL_234] (rows=50796051 width=235) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_233] (rows=43558464 width=135) + Group By Operator [GBY_233] (rows=50796051 width=239) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0, _col1 - Group By Operator [GBY_64] (rows=87116928 width=135) + Group By Operator [GBY_64] (rows=50796051 width=239) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col3, _col4 - Merge Join Operator [MERGEJOIN_189] (rows=87116928 width=135) + Merge Join Operator [MERGEJOIN_189] (rows=50796051 width=233) Conds:RS_198._col0=RS_61._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_193] (rows=36524 width=1119) + Select Operator [SEL_193] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_192] (rows=36524 width=1119) + Filter Operator [FIL_192] (rows=652 width=8) predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_0] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_59] (rows=79197206 width=135) + Select Operator [SEL_59] (rows=143274863 width=240) Output:["_col0","_col1","_col2","_col4","_col5","_col6"] - Filter Operator [FIL_58] (rows=79197206 width=135) + Filter Operator [FIL_58] (rows=143274863 width=240) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_188] (rows=158394413 width=135) + Merge Join Operator [MERGEJOIN_188] (rows=234359952 width=240) Conds:RS_230._col2, _col3=RS_232._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_230] PartitionCols:_col2, _col3 - Select Operator [SEL_229] (rows=143994918 width=135) + Select Operator [SEL_229] (rows=143274863 width=242) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_228] (rows=143994918 width=135) + Filter Operator [FIL_228] (rows=143274863 width=242) predicate:((cs_item_sk = cs_item_sk) and (cs_sold_date_sk BETWEEN DynamicValue(RS_60_date_dim_d_date_sk_min) AND DynamicValue(RS_60_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_60_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_50] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_order_number","cs_quantity","cs_wholesale_cost","cs_sales_price"] + TableScan [TS_50] (rows=287989836 width=242) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_order_number","cs_quantity","cs_wholesale_cost","cs_sales_price"] <-Reducer 13 [BROADCAST_EDGE] vectorized BROADCAST [RS_227] Group By Operator [GBY_226] (rows=1 width=12) @@ -217,36 +217,36 @@ Stage-0 SHUFFLE [RS_205] Group By Operator [GBY_202] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_199] (rows=36524 width=1119) + Select Operator [SEL_199] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_193] <-Map 22 [SIMPLE_EDGE] vectorized SHUFFLE [RS_232] PartitionCols:_col0, _col1 - Select Operator [SEL_231] (rows=28798881 width=106) + Select Operator [SEL_231] (rows=28798881 width=8) Output:["_col0","_col1"] - TableScan [TS_53] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] + TableScan [TS_53] (rows=28798881 width=8) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col1 - Filter Operator [FIL_45] (rows=95831279 width=88) + Filter Operator [FIL_45] (rows=4580227799 width=471) predicate:CASE WHEN (_col7 is not null) THEN ((_col7 > 0L)) ELSE (false) END - Merge Join Operator [MERGEJOIN_190] (rows=191662559 width=88) + Merge Join Operator [MERGEJOIN_190] (rows=9160455599 width=471) Conds:RS_215._col1, _col0=RS_225._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_215] PartitionCols:_col1, _col0 - Select Operator [SEL_214] (rows=174238687 width=88) + Select Operator [SEL_214] (rows=114214965 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_213] (rows=174238687 width=88) + Group By Operator [GBY_213] (rows=114214965 width=239) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=348477374 width=88) + Group By Operator [GBY_17] (rows=114214965 width=239) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_185] (rows=348477374 width=88) + Merge Join Operator [MERGEJOIN_185] (rows=114214965 width=217) Conds:RS_194._col0=RS_14._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_194] @@ -255,21 +255,21 @@ Stage-0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_14] PartitionCols:_col0 - Select Operator [SEL_12] (rows=316797606 width=88) + Select Operator [SEL_12] (rows=319876350 width=235) Output:["_col0","_col1","_col2","_col4","_col5","_col6"] - Filter Operator [FIL_11] (rows=316797606 width=88) + Filter Operator [FIL_11] (rows=319876350 width=235) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_184] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_184] (rows=883006376 width=235) Conds:RS_210._col1, _col3=RS_212._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_210] PartitionCols:_col1, _col3 - Select Operator [SEL_209] (rows=575995635 width=88) + Select Operator [SEL_209] (rows=550076554 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_208] (rows=575995635 width=88) + Filter Operator [FIL_208] (rows=550076554 width=233) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_sales_price"] + TableScan [TS_3] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_207] Group By Operator [GBY_206] (rows=1 width=12) @@ -278,29 +278,29 @@ Stage-0 SHUFFLE [RS_203] Group By Operator [GBY_200] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=36524 width=1119) + Select Operator [SEL_195] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_193] <-Map 16 [SIMPLE_EDGE] vectorized SHUFFLE [RS_212] PartitionCols:_col0, _col1 - Select Operator [SEL_211] (rows=57591150 width=77) + Select Operator [SEL_211] (rows=57591150 width=8) Output:["_col0","_col1"] - TableScan [TS_6] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + TableScan [TS_6] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_225] PartitionCols:_col1, _col0 - Select Operator [SEL_224] (rows=43560808 width=135) + Select Operator [SEL_224] (rows=40539971 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_223] (rows=43560808 width=135) + Group By Operator [GBY_223] (rows=40539971 width=239) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0, _col1 - Group By Operator [GBY_38] (rows=87121617 width=135) + Group By Operator [GBY_38] (rows=40539971 width=239) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_187] (rows=87121617 width=135) + Merge Join Operator [MERGEJOIN_187] (rows=40539971 width=235) Conds:RS_196._col0=RS_35._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_196] @@ -309,21 +309,21 @@ Stage-0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0 - Select Operator [SEL_33] (rows=79201469 width=135) + Select Operator [SEL_33] (rows=113538342 width=242) Output:["_col0","_col1","_col2","_col4","_col5","_col6"] - Filter Operator [FIL_32] (rows=79201469 width=135) + Filter Operator [FIL_32] (rows=113538342 width=242) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_186] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_186] (rows=254679677 width=242) Conds:RS_220._col1, _col3=RS_222._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 17 [SIMPLE_EDGE] vectorized SHUFFLE [RS_220] PartitionCols:_col1, _col3 - Select Operator [SEL_219] (rows=144002668 width=135) + Select Operator [SEL_219] (rows=143966864 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_218] (rows=144002668 width=135) + Filter Operator [FIL_218] (rows=143966864 width=243) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_24] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_order_number","ws_quantity","ws_wholesale_cost","ws_sales_price"] + TableScan [TS_24] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_order_number","ws_quantity","ws_wholesale_cost","ws_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_217] Group By Operator [GBY_216] (rows=1 width=12) @@ -332,14 +332,14 @@ Stage-0 SHUFFLE [RS_204] Group By Operator [GBY_201] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=36524 width=1119) + Select Operator [SEL_197] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_193] <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_222] PartitionCols:_col0, _col1 - Select Operator [SEL_221] (rows=14398467 width=92) + Select Operator [SEL_221] (rows=14398467 width=8) Output:["_col0","_col1"] - TableScan [TS_27] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number"] + TableScan [TS_27] (rows=14398467 width=8) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/query79.q.out index 768b8c9cb2..f1538371a9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query79.q.out @@ -57,145 +57,121 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 5 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 10 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_141] - Limit [LIM_140] (rows=100 width=88) + File Output Operator [FS_131] + Limit [LIM_130] (rows=100 width=776) Number of rows:100 - Select Operator [SEL_139] (rows=421657640 width=88) + Select Operator [SEL_129] (rows=43530621 width=776) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=421657640 width=88) + Select Operator [SEL_32] (rows=43530621 width=776) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_100] (rows=421657640 width=88) - Conds:RS_103._col0=RS_138._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_100] (rows=43530621 width=501) + Conds:RS_103._col0=RS_128._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=80000000 width=860) + Select Operator [SEL_102] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_101] (rows=80000000 width=860) + Filter Operator [FIL_101] (rows=80000000 width=184) predicate:c_customer_sk is not null - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + TableScan [TS_0] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] PartitionCols:_col1 - Select Operator [SEL_137] (rows=383325119 width=88) + Select Operator [SEL_127] (rows=43530621 width=323) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_136] (rows=383325119 width=88) + Group By Operator [GBY_126] (rows=43530621 width=325) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 8 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_25] (rows=766650239 width=88) + Group By Operator [GBY_25] (rows=43530621 width=325) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col3, _col5, _col13 - Merge Join Operator [MERGEJOIN_99] (rows=766650239 width=88) - Conds:RS_21._col2=RS_125._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col13"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + Merge Join Operator [MERGEJOIN_99] (rows=43530621 width=214) + Conds:RS_21._col2=RS_114._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col13"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] PartitionCols:_col0 - Select Operator [SEL_124] (rows=6000 width=107) + Select Operator [SEL_113] (rows=3055 width=12) Output:["_col0"] - Filter Operator [FIL_123] (rows=6000 width=107) + Filter Operator [FIL_112] (rows=3055 width=12) predicate:(((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) - TableScan [TS_12] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 7 [SIMPLE_EDGE] + TableScan [TS_12] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_98] (rows=696954748 width=88) - Conds:RS_18._col4=RS_117._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col13"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_98] (rows=102592623 width=283) + Conds:RS_18._col4=RS_125._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col13"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_116] (rows=1704 width=1910) + Select Operator [SEL_124] (rows=1704 width=100) Output:["_col0","_col2"] - Filter Operator [FIL_115] (rows=1704 width=1910) + Filter Operator [FIL_123] (rows=1704 width=100) predicate:(s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_number_employees","s_city"] - <-Reducer 6 [SIMPLE_EDGE] + TableScan [TS_9] (rows=1704 width=100) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_number_employees","s_city"] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_97] (rows=633595212 width=88) - Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + Merge Join Operator [MERGEJOIN_97] (rows=102592623 width=193) + Conds:RS_122._col0=RS_106._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_108] (rows=36524 width=1119) + Select Operator [SEL_105] (rows=391 width=12) Output:["_col0"] - Filter Operator [FIL_107] (rows=36524 width=1119) + Filter Operator [FIL_104] (rows=391 width=12) predicate:((d_dow = 1) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dow"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_134] (rows=575995635 width=88) + Select Operator [SEL_121] (rows=479121995 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_133] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_customer_c_customer_sk_min) AND DynamicValue(RS_29_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_22_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_22_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_22_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) + Filter Operator [FIL_120] (rows=479121995 width=237) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_22_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_22_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_22_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=237) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=36524 width=1119) + Select Operator [SEL_107] (rows=391 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_108] + Please refer to the previous Select Operator [SEL_105] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) + BROADCAST [RS_119] + Group By Operator [GBY_118] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) + SHUFFLE [RS_117] + Group By Operator [GBY_116] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_118] (rows=1704 width=1910) + Select Operator [SEL_115] (rows=3055 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_116] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_126] (rows=6000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_124] - <-Reducer 4 [BROADCAST_EDGE] vectorized - BROADCAST [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_104] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Please refer to the previous Select Operator [SEL_113] diff --git a/ql/src/test/results/clientpositive/perf/tez/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out index 6924196be1..8c245509ca 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -247,131 +247,131 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_151] - Limit [LIM_150] (rows=100 width=88) + Limit [LIM_150] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_149] (rows=348477374 width=88) + Select Operator [SEL_149] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_148] - Group By Operator [GBY_147] (rows=348477374 width=88) + Group By Operator [GBY_147] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 - Group By Operator [GBY_56] (rows=696954748 width=88) + Group By Operator [GBY_56] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col8 - Top N Key Operator [TNK_84] (rows=696954748 width=88) + Top N Key Operator [TNK_84] (rows=1 width=200) keys:_col8,sort order:+,top n:100 - Merge Join Operator [MERGEJOIN_118] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col8"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=1874 width=1911) + Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) Conds:RS_138.substr(_col0, 1, 2)=RS_141.substr(_col2, 1, 2)(Inner),Output:["_col1","_col2"] <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_141] PartitionCols:substr(_col2, 1, 2) - Select Operator [SEL_140] (rows=1704 width=1910) + Select Operator [SEL_140] (rows=1704 width=181) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_139] (rows=1704 width=1910) + Filter Operator [FIL_139] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) - TableScan [TS_42] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] + TableScan [TS_42] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_138] PartitionCols:substr(_col0, 1, 2) - Select Operator [SEL_137] (rows=1 width=1014) + Select Operator [SEL_137] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_136] (rows=1 width=1014) + Filter Operator [FIL_136] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_135] (rows=6833333 width=1014) + Group By Operator [GBY_135] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 17 [CONTAINS] vectorized Reduce Output Operator [RS_172] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=13666666 width=1014) + Group By Operator [GBY_171] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_170] (rows=3666666 width=1014) + Group By Operator [GBY_170] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] vectorized SHUFFLE [RS_169] PartitionCols:_col0 - Group By Operator [GBY_168] (rows=7333333 width=1014) + Group By Operator [GBY_168] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_167] (rows=7333333 width=1014) + Select Operator [SEL_167] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_166] (rows=7333333 width=1014) + Filter Operator [FIL_166] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_165] (rows=22000000 width=1014) + Group By Operator [GBY_165] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 - Group By Operator [GBY_24] (rows=44000000 width=1014) + Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 - Merge Join Operator [MERGEJOIN_116] (rows=44000000 width=1014) + Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) Conds:RS_161._col0=RS_164._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_161] PartitionCols:_col0 - Select Operator [SEL_160] (rows=40000000 width=1014) + Select Operator [SEL_160] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=40000000 width=1014) + Filter Operator [FIL_159] (rows=40000000 width=93) predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) - TableScan [TS_14] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_zip"] + TableScan [TS_14] (rows=40000000 width=93) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_164] PartitionCols:_col0 - Select Operator [SEL_163] (rows=40000000 width=860) + Select Operator [SEL_163] (rows=26666667 width=89) Output:["_col0"] - Filter Operator [FIL_162] (rows=40000000 width=860) + Filter Operator [FIL_162] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) - TableScan [TS_17] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_current_addr_sk","c_preferred_cust_flag"] + TableScan [TS_17] (rows=80000000 width=89) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] <-Reducer 9 [CONTAINS] vectorized Reduce Output Operator [RS_158] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=13666666 width=1014) + Group By Operator [GBY_157] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_156] (rows=10000000 width=1014) + Group By Operator [GBY_156] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_155] PartitionCols:_col0 - Group By Operator [GBY_154] (rows=20000000 width=1014) + Group By Operator [GBY_154] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_153] (rows=20000000 width=1014) + Select Operator [SEL_153] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_152] (rows=20000000 width=1014) + Filter Operator [FIL_152] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_zip"] + TableScan [TS_6] (rows=40000000 width=89) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_115] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) Conds:RS_146._col0=RS_129._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_129] PartitionCols:_col0 - Select Operator [SEL_128] (rows=18262 width=1119) + Select Operator [SEL_128] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_127] (rows=18262 width=1119) + Filter Operator [FIL_127] (rows=130 width=12) predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_145] (rows=575995635 width=88) + Select Operator [SEL_145] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_144] (rows=575995635 width=88) + Filter Operator [FIL_144] (rows=525329897 width=114) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized BROADCAST [RS_143] Group By Operator [GBY_142] (rows=1 width=12) @@ -380,7 +380,7 @@ Stage-0 SHUFFLE [RS_92] Group By Operator [GBY_91] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_90] (rows=1874 width=1911) + Select Operator [SEL_90] (rows=1 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_117] <-Reducer 7 [BROADCAST_EDGE] vectorized @@ -391,7 +391,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_132] Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=18262 width=1119) + Select Operator [SEL_130] (rows=130 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_128] diff --git a/ql/src/test/results/clientpositive/perf/tez/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/query80.q.out index fd1b971df4..ff26befbd4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query80.q.out @@ -217,22 +217,22 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE) -Map 36 <- Reducer 19 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE) -Map 41 <- Reducer 25 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE) +Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 31 (BROADCAST_EDGE) +Map 35 <- Reducer 19 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE) +Map 39 <- Reducer 25 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE) +Reducer 14 <- Map 12 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 16 <- Map 30 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 39 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 17 <- Map 38 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 12 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 20 <- Map 12 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Map 30 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 44 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 23 <- Map 42 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 25 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) @@ -242,12 +242,9 @@ Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 31 <- Map 30 (CUSTOM_SIMPLE_EDGE) Reducer 32 <- Map 30 (CUSTOM_SIMPLE_EDGE) Reducer 33 <- Map 30 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 36 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) +Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) Reducer 4 <- Map 26 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 42 <- Map 41 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) -Reducer 45 <- Map 44 (CUSTOM_SIMPLE_EDGE) +Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) Reducer 5 <- Map 30 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 34 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 8 (CONTAINS) @@ -258,189 +255,178 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_465] - Limit [LIM_464] (rows=100 width=108) + File Output Operator [FS_460] + Limit [LIM_459] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_463] (rows=1217531358 width=108) + Select Operator [SEL_458] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_462] - Select Operator [SEL_461] (rows=1217531358 width=108) + SHUFFLE [RS_457] + Select Operator [SEL_456] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_460] (rows=1217531358 width=108) + Group By Operator [GBY_455] (rows=38846 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 8 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_490] + Reduce Output Operator [RS_480] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_489] (rows=2435062716 width=108) + Group By Operator [GBY_479] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_488] (rows=811687572 width=108) + Top N Key Operator [TNK_478] (rows=39721 width=618) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_487] (rows=231905279 width=135) + Select Operator [SEL_477] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_486] (rows=231905279 width=135) + Group By Operator [GBY_476] (rows=38846 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col0 - Group By Operator [GBY_74] (rows=463810558 width=135) + Group By Operator [GBY_74] (rows=46000 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_72] (rows=463810558 width=135) + Select Operator [SEL_72] (rows=8592843 width=305) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_369] (rows=463810558 width=135) - Conds:RS_69._col1=RS_474._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] - <-Map 39 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_474] + Merge Join Operator [MERGEJOIN_369] (rows=8592843 width=305) + Conds:RS_69._col1=RS_475._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_475] PartitionCols:_col0 - Select Operator [SEL_473] (rows=46000 width=460) + Select Operator [SEL_474] (rows=46000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_472] (rows=46000 width=460) + Filter Operator [FIL_473] (rows=46000 width=104) predicate:cp_catalog_page_sk is not null - TableScan [TS_54] (rows=46000 width=460) - default@catalog_page,catalog_page,Tbl:COMPLETE,Col:NONE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] + TableScan [TS_54] (rows=46000 width=104) + default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_368] (rows=421645953 width=135) + Merge Join Operator [MERGEJOIN_368] (rows=8592843 width=208) Conds:RS_66._col3=RS_429._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_429] PartitionCols:_col0 - Select Operator [SEL_426] (rows=1150 width=1179) + Select Operator [SEL_426] (rows=1150 width=89) Output:["_col0"] - Filter Operator [FIL_425] (rows=1150 width=1179) + Filter Operator [FIL_425] (rows=1150 width=89) predicate:((p_channel_tv = 'N') and p_promo_sk is not null) - TableScan [TS_12] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_tv"] + TableScan [TS_12] (rows=2300 width=89) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_tv"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_367] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_367] (rows=17185686 width=222) Conds:RS_63._col2=RS_413._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized SHUFFLE [RS_413] PartitionCols:_col0 - Select Operator [SEL_410] (rows=154000 width=1436) + Select Operator [SEL_410] (rows=154000 width=115) Output:["_col0"] - Filter Operator [FIL_409] (rows=154000 width=1436) + Filter Operator [FIL_409] (rows=154000 width=115) predicate:((i_current_price > 50) and i_item_sk is not null) - TableScan [TS_9] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price"] + TableScan [TS_9] (rows=462000 width=115) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_366] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_366] (rows=51557056 width=232) Conds:RS_60._col0=RS_397._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_397] PartitionCols:_col0 - Select Operator [SEL_394] (rows=8116 width=1119) + Select Operator [SEL_394] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_393] (rows=8116 width=1119) + Filter Operator [FIL_393] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 37 [SIMPLE_EDGE] + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 36 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_365] (rows=316788826 width=135) - Conds:RS_482._col2, _col4=RS_485._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_482] + Merge Join Operator [MERGEJOIN_365] (rows=464045263 width=326) + Conds:RS_469._col2, _col4=RS_472._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_469] PartitionCols:_col2, _col4 - Select Operator [SEL_481] (rows=287989836 width=135) + Select Operator [SEL_468] (rows=283691906 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_480] (rows=287989836 width=135) - predicate:((cs_catalog_page_sk BETWEEN DynamicValue(RS_70_catalog_page_cp_catalog_page_sk_min) AND DynamicValue(RS_70_catalog_page_cp_catalog_page_sk_max) and in_bloom_filter(cs_catalog_page_sk, DynamicValue(RS_70_catalog_page_cp_catalog_page_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_64_item_i_item_sk_min) AND DynamicValue(RS_64_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_64_item_i_item_sk_bloom_filter))) and (cs_promo_sk BETWEEN DynamicValue(RS_67_promotion_p_promo_sk_min) AND DynamicValue(RS_67_promotion_p_promo_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_67_promotion_p_promo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_39] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] + Filter Operator [FIL_467] (rows=283691906 width=243) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_64_item_i_item_sk_min) AND DynamicValue(RS_64_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_64_item_i_item_sk_bloom_filter))) and (cs_promo_sk BETWEEN DynamicValue(RS_67_promotion_p_promo_sk_min) AND DynamicValue(RS_67_promotion_p_promo_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_67_promotion_p_promo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_39] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_467] - Group By Operator [GBY_466] (rows=1 width=12) + BROADCAST [RS_462] + Group By Operator [GBY_461] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_405] Group By Operator [GBY_402] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_398] (rows=8116 width=1119) + Select Operator [SEL_398] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_394] <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_469] - Group By Operator [GBY_468] (rows=1 width=12) + BROADCAST [RS_464] + Group By Operator [GBY_463] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_421] Group By Operator [GBY_418] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_414] (rows=154000 width=1436) + Select Operator [SEL_414] (rows=154000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_410] <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_471] - Group By Operator [GBY_470] (rows=1 width=12) + BROADCAST [RS_466] + Group By Operator [GBY_465] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_437] Group By Operator [GBY_434] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_430] (rows=1150 width=1179) + Select Operator [SEL_430] (rows=1150 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_426] - <-Reducer 40 [BROADCAST_EDGE] vectorized - BROADCAST [RS_479] - Group By Operator [GBY_478] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_477] - Group By Operator [GBY_476] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_475] (rows=46000 width=460) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_473] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_485] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_472] PartitionCols:_col0, _col1 - Select Operator [SEL_484] (rows=28798881 width=106) + Select Operator [SEL_471] (rows=28798881 width=227) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_483] (rows=28798881 width=106) + Filter Operator [FIL_470] (rows=28798881 width=227) predicate:cr_item_sk is not null - TableScan [TS_42] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] + TableScan [TS_42] (rows=28798881 width=227) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_515] + Reduce Output Operator [RS_500] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_514] (rows=2435062716 width=108) + Group By Operator [GBY_499] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_513] (rows=811687572 width=108) + Top N Key Operator [TNK_498] (rows=39721 width=618) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_512] (rows=115958879 width=135) + Select Operator [SEL_497] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_511] (rows=115958879 width=135) + Group By Operator [GBY_496] (rows=53 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_114] (rows=231917759 width=135) + Group By Operator [GBY_114] (rows=318 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_112] (rows=231917759 width=135) + Select Operator [SEL_112] (rows=4714659 width=323) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_374] (rows=231917759 width=135) - Conds:RS_109._col2=RS_499._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_499] + Merge Join Operator [MERGEJOIN_374] (rows=4714659 width=323) + Conds:RS_109._col2=RS_495._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_495] PartitionCols:_col0 - Select Operator [SEL_498] (rows=84 width=1850) + Select Operator [SEL_494] (rows=84 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_497] (rows=84 width=1850) + Filter Operator [FIL_493] (rows=84 width=104) predicate:web_site_sk is not null - TableScan [TS_94] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_site_id"] + TableScan [TS_94] (rows=84 width=104) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_373] (rows=210834322 width=135) + Merge Join Operator [MERGEJOIN_373] (rows=4714659 width=227) Conds:RS_106._col3=RS_431._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_431] @@ -449,7 +435,7 @@ Stage-0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_372] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_372] (rows=9429318 width=231) Conds:RS_103._col1=RS_415._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized SHUFFLE [RS_415] @@ -458,112 +444,101 @@ Stage-0 <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_371] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_371] (rows=28287952 width=235) Conds:RS_100._col0=RS_399._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_399] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_394] - <-Reducer 42 [SIMPLE_EDGE] + <-Reducer 40 [SIMPLE_EDGE] SHUFFLE [RS_100] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_370] (rows=158402938 width=135) - Conds:RS_507._col1, _col4=RS_510._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] + Merge Join Operator [MERGEJOIN_370] (rows=254608997 width=363) + Conds:RS_489._col1, _col4=RS_492._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_489] PartitionCols:_col1, _col4 - Select Operator [SEL_506] (rows=144002668 width=135) + Select Operator [SEL_488] (rows=143894769 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_505] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_104_item_i_item_sk_min) AND DynamicValue(RS_104_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_104_item_i_item_sk_bloom_filter))) and (ws_promo_sk BETWEEN DynamicValue(RS_107_promotion_p_promo_sk_min) AND DynamicValue(RS_107_promotion_p_promo_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_107_promotion_p_promo_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_101_date_dim_d_date_sk_min) AND DynamicValue(RS_101_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_101_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_110_web_site_web_site_sk_min) AND DynamicValue(RS_110_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_110_web_site_web_site_sk_bloom_filter))) and ws_item_sk is not null and ws_promo_sk is not null and ws_sold_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_79] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] + Filter Operator [FIL_487] (rows=143894769 width=243) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_104_item_i_item_sk_min) AND DynamicValue(RS_104_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_104_item_i_item_sk_bloom_filter))) and (ws_promo_sk BETWEEN DynamicValue(RS_107_promotion_p_promo_sk_min) AND DynamicValue(RS_107_promotion_p_promo_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_107_promotion_p_promo_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_101_date_dim_d_date_sk_min) AND DynamicValue(RS_101_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_101_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_promo_sk is not null and ws_sold_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_79] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_492] - Group By Operator [GBY_491] (rows=1 width=12) + BROADCAST [RS_482] + Group By Operator [GBY_481] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_406] Group By Operator [GBY_403] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_400] (rows=8116 width=1119) + Select Operator [SEL_400] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_394] <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_494] - Group By Operator [GBY_493] (rows=1 width=12) + BROADCAST [RS_484] + Group By Operator [GBY_483] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_422] Group By Operator [GBY_419] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_416] (rows=154000 width=1436) + Select Operator [SEL_416] (rows=154000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_410] <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_496] - Group By Operator [GBY_495] (rows=1 width=12) + BROADCAST [RS_486] + Group By Operator [GBY_485] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_438] Group By Operator [GBY_435] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_432] (rows=1150 width=1179) + Select Operator [SEL_432] (rows=1150 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_426] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_504] - Group By Operator [GBY_503] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_502] - Group By Operator [GBY_501] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_500] (rows=84 width=1850) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_498] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_492] PartitionCols:_col0, _col1 - Select Operator [SEL_509] (rows=14398467 width=92) + Select Operator [SEL_491] (rows=14398467 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_508] (rows=14398467 width=92) + Filter Operator [FIL_490] (rows=14398467 width=221) predicate:wr_item_sk is not null - TableScan [TS_82] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] + TableScan [TS_82] (rows=14398467 width=221) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 7 [CONTAINS] vectorized - Reduce Output Operator [RS_459] + Reduce Output Operator [RS_454] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_458] (rows=2435062716 width=108) + Group By Operator [GBY_453] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_457] (rows=811687572 width=108) + Top N Key Operator [TNK_452] (rows=39721 width=618) keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_456] (rows=463823414 width=88) + Select Operator [SEL_451] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_455] (rows=463823414 width=88) + Group By Operator [GBY_450] (rows=822 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_35] (rows=927646829 width=88) + Group By Operator [GBY_35] (rows=1704 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_33] (rows=927646829 width=88) + Select Operator [SEL_33] (rows=15038783 width=100) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_364] (rows=927646829 width=88) - Conds:RS_30._col2=RS_443._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] + Merge Join Operator [MERGEJOIN_364] (rows=15038783 width=100) + Conds:RS_30._col2=RS_449._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_443] + SHUFFLE [RS_449] PartitionCols:_col0 - Select Operator [SEL_442] (rows=1704 width=1910) + Select Operator [SEL_448] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_441] (rows=1704 width=1910) + Filter Operator [FIL_447] (rows=1704 width=104) predicate:s_store_sk is not null - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] + TableScan [TS_15] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_363] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_363] (rows=15038783 width=0) Conds:RS_27._col3=RS_427._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_427] @@ -572,7 +547,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_362] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_362] (rows=30077566 width=57) Conds:RS_24._col1=RS_411._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized SHUFFLE [RS_411] @@ -581,7 +556,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_361] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_361] (rows=90232695 width=177) Conds:RS_21._col0=RS_395._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_395] @@ -590,17 +565,17 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_360] (rows=633595212 width=88) - Conds:RS_451._col1, _col4=RS_454._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_360] (rows=812149846 width=374) + Conds:RS_443._col1, _col4=RS_446._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_451] + SHUFFLE [RS_443] PartitionCols:_col1, _col4 - Select Operator [SEL_450] (rows=575995635 width=88) + Select Operator [SEL_442] (rows=501693263 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_449] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_28_promotion_p_promo_sk_min) AND DynamicValue(RS_28_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_28_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] + Filter Operator [FIL_441] (rows=501693263 width=233) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_28_promotion_p_promo_sk_min) AND DynamicValue(RS_28_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_28_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized BROADCAST [RS_408] Group By Operator [GBY_407] (rows=1 width=12) @@ -609,7 +584,7 @@ Stage-0 SHUFFLE [RS_404] Group By Operator [GBY_401] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_396] (rows=8116 width=1119) + Select Operator [SEL_396] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_394] <-Reducer 27 [BROADCAST_EDGE] vectorized @@ -620,7 +595,7 @@ Stage-0 SHUFFLE [RS_420] Group By Operator [GBY_417] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_412] (rows=154000 width=1436) + Select Operator [SEL_412] (rows=154000 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_410] <-Reducer 31 [BROADCAST_EDGE] vectorized @@ -631,27 +606,16 @@ Stage-0 SHUFFLE [RS_436] Group By Operator [GBY_433] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_428] (rows=1150 width=1179) + Select Operator [SEL_428] (rows=1150 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_426] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_448] - Group By Operator [GBY_447] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_446] - Group By Operator [GBY_445] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_444] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_442] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_454] + SHUFFLE [RS_446] PartitionCols:_col0, _col1 - Select Operator [SEL_453] (rows=57591150 width=77) + Select Operator [SEL_445] (rows=57591150 width=224) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_452] (rows=57591150 width=77) + Filter Operator [FIL_444] (rows=57591150 width=224) predicate:sr_item_sk is not null - TableScan [TS_3] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] + TableScan [TS_3] (rows=57591150 width=224) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/query81.q.out index 581c22a647..b08ed9e24b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query81.q.out @@ -88,90 +88,90 @@ Stage-0 Stage-1 Reducer 4 vectorized File Output Operator [FS_210] - Select Operator [SEL_209] (rows=100 width=860) + Select Operator [SEL_209] (rows=100 width=1506) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Limit [LIM_208] (rows=100 width=860) + Limit [LIM_208] (rows=100 width=1420) Number of rows:100 - Select Operator [SEL_207] (rows=96800003 width=860) + Select Operator [SEL_207] (rows=1577696 width=1418) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_63] - Select Operator [SEL_62] (rows=96800003 width=860) + Select Operator [SEL_62] (rows=1577696 width=1418) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_178] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_178] (rows=1577696 width=1418) Conds:RS_59._col0=RS_60._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 - Select Operator [SEL_55] (rows=8066666 width=1014) + Select Operator [SEL_55] (rows=1609248 width=227) Output:["_col0","_col2"] - Filter Operator [FIL_54] (rows=8066666 width=1014) + Filter Operator [FIL_54] (rows=1609248 width=227) predicate:(_col2 > _col3) - Merge Join Operator [MERGEJOIN_177] (rows=24200000 width=1014) + Merge Join Operator [MERGEJOIN_177] (rows=4827746 width=227) Conds:RS_201._col1=RS_206._col1(Inner),Output:["_col0","_col2","_col3"] <-Reducer 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_206] PartitionCols:_col1 - Select Operator [SEL_205] (rows=11000000 width=1014) + Select Operator [SEL_205] (rows=12 width=198) Output:["_col0","_col1"] - Group By Operator [GBY_204] (rows=11000000 width=1014) + Group By Operator [GBY_204] (rows=12 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 - Select Operator [SEL_203] (rows=22000000 width=1014) + Select Operator [SEL_203] (rows=5266632 width=201) Output:["_col0","_col2"] - Group By Operator [GBY_202] (rows=22000000 width=1014) + Group By Operator [GBY_202] (rows=5266632 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Group By Operator [GBY_42] (rows=44000000 width=1014) + Group By Operator [GBY_42] (rows=8749496 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Merge Join Operator [MERGEJOIN_176] (rows=44000000 width=1014) + Merge Join Operator [MERGEJOIN_176] (rows=8749496 width=194) Conds:RS_38._col2=RS_198._col0(Inner),Output:["_col1","_col3","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_196] (rows=40000000 width=1014) + Select Operator [SEL_196] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_195] (rows=40000000 width=1014) + Filter Operator [FIL_195] (rows=40000000 width=90) predicate:(ca_address_sk is not null and ca_state is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + TableScan [TS_12] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_175] (rows=31678769 width=106) + Merge Join Operator [MERGEJOIN_175] (rows=8749496 width=112) Conds:RS_190._col0=RS_194._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_192] (rows=36524 width=1119) + Select Operator [SEL_192] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_191] (rows=36524 width=1119) + Filter Operator [FIL_191] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_190] PartitionCols:_col0 - Select Operator [SEL_188] (rows=28798881 width=106) + Select Operator [SEL_188] (rows=28221532 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_186] (rows=28798881 width=106) + Filter Operator [FIL_186] (rows=28221532 width=121) predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) - TableScan [TS_6] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + TableScan [TS_6] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_201] PartitionCols:_col1 - Select Operator [SEL_200] (rows=22000000 width=1014) + Select Operator [SEL_200] (rows=4827746 width=201) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_199] (rows=22000000 width=1014) + Group By Operator [GBY_199] (rows=4827746 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=44000000 width=1014) + Group By Operator [GBY_22] (rows=8574602 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Merge Join Operator [MERGEJOIN_174] (rows=44000000 width=1014) + Merge Join Operator [MERGEJOIN_174] (rows=8574602 width=194) Conds:RS_18._col2=RS_197._col0(Inner),Output:["_col1","_col3","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_197] @@ -180,7 +180,7 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_173] (rows=31678769 width=106) + Merge Join Operator [MERGEJOIN_173] (rows=8574602 width=112) Conds:RS_189._col0=RS_193._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_193] @@ -189,32 +189,32 @@ Stage-0 <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_187] (rows=28798881 width=106) + Select Operator [SEL_187] (rows=27657410 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_185] (rows=28798881 width=106) + Filter Operator [FIL_185] (rows=27657410 width=121) predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_172] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_172] (rows=1568628 width=1310) Conds:RS_181._col2=RS_184._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_181] PartitionCols:_col2 - Select Operator [SEL_180] (rows=80000000 width=860) + Select Operator [SEL_180] (rows=80000000 width=375) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_179] (rows=80000000 width=860) + Filter Operator [FIL_179] (rows=80000000 width=375) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + TableScan [TS_0] (rows=80000000 width=375) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_184] PartitionCols:_col0 - Select Operator [SEL_183] (rows=20000000 width=1014) + Select Operator [SEL_183] (rows=784314 width=1027) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_182] (rows=20000000 width=1014) + Filter Operator [FIL_182] (rows=784314 width=1027) predicate:((ca_state = 'IL') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + TableScan [TS_3] (rows=40000000 width=1027) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query82.q.out b/ql/src/test/results/clientpositive/perf/tez/query82.q.out index 7291dae466..6bc9666784 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query82.q.out @@ -43,98 +43,104 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 vectorized - File Output Operator [FS_97] - Limit [LIM_96] (rows=100 width=88) + Reducer 5 vectorized + File Output Operator [FS_103] + Limit [LIM_102] (rows=1 width=396) Number of rows:100 - Select Operator [SEL_95] (rows=633595212 width=88) + Select Operator [SEL_101] (rows=1 width=396) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_94] - Group By Operator [GBY_93] (rows=633595212 width=88) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] + Group By Operator [GBY_99] (rows=1 width=396) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_22] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_21] (rows=1267190424 width=88) + Group By Operator [GBY_23] (rows=2 width=396) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Top N Key Operator [TNK_43] (rows=1267190424 width=88) + Top N Key Operator [TNK_43] (rows=2871 width=396) keys:_col2, _col3, _col4,sort order:+++,top n:100 - Merge Join Operator [MERGEJOIN_73] (rows=1267190424 width=88) - Conds:RS_92._col0=RS_76._col0(Inner),RS_76._col0=RS_18._col1(Inner),Output:["_col2","_col3","_col4"] - <-Map 5 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_76] - PartitionCols:_col0 - Select Operator [SEL_75] (rows=51333 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_74] (rows=51333 width=1436) - predicate:((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] - <-Reducer 8 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_18] + Merge Join Operator [MERGEJOIN_79] (rows=2871 width=396) + Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_72] (rows=4593600 width=15) - Conds:RS_84._col0=RS_87._col0(Inner),Output:["_col1"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_87] + Merge Join Operator [MERGEJOIN_78] (rows=463969 width=4) + Conds:RS_90._col0=RS_93._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] PartitionCols:_col0 - Select Operator [SEL_86] (rows=8116 width=1119) + Select Operator [SEL_92] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_85] (rows=8116 width=1119) + Filter Operator [FIL_91] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_84] + TableScan [TS_9] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_90] PartitionCols:_col0 - Select Operator [SEL_83] (rows=4176000 width=15) + Select Operator [SEL_89] (rows=4176000 width=11) Output:["_col0","_col1"] - Filter Operator [FIL_82] (rows=4176000 width=15) + Filter Operator [FIL_88] (rows=4176000 width=11) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) - TableScan [TS_6] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_92] - PartitionCols:_col0 - Select Operator [SEL_91] (rows=575995635 width=88) - Output:["_col0"] - Filter Operator [FIL_90] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_inventory_inv_item_sk_min) AND DynamicValue(RS_18_inventory_inv_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_inventory_inv_item_sk_bloom_filter))) and ss_item_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_81] - Group By Operator [GBY_80] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_79] - Group By Operator [GBY_78] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_77] (rows=51333 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_75] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_89] - Group By Operator [GBY_88] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4593600)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_51] - Group By Operator [GBY_50] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4593600)"] - Select Operator [SEL_49] (rows=4593600 width=15) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_72] + TableScan [TS_6] (rows=37584000 width=11) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Reducer 2 [ONE_TO_ONE_EDGE] + FORWARD [RS_19] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_77] (rows=3564040 width=400) + Conds:RS_98._col0=RS_82._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_82] + PartitionCols:_col0 + Select Operator [SEL_81] (rows=297 width=404) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_80] (rows=297 width=404) + predicate:((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) + TableScan [TS_3] (rows=462000 width=403) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_98] + PartitionCols:_col0 + Select Operator [SEL_97] (rows=575995635 width=4) + Output:["_col0"] + Filter Operator [FIL_96] (rows=575995635 width=4) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_20_inventory_inv_item_sk_min) AND DynamicValue(RS_20_inventory_inv_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_20_inventory_inv_item_sk_bloom_filter))) and ss_item_sk is not null) + TableScan [TS_0] (rows=575995635 width=4) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_95] + Group By Operator [GBY_94] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_51] + Group By Operator [GBY_50] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_49] (rows=463969 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_78] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_87] + Group By Operator [GBY_86] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_85] + Group By Operator [GBY_84] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_83] (rows=297 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_81] diff --git a/ql/src/test/results/clientpositive/perf/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/query83.q.out index d5c383f956..a99851f080 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query83.q.out @@ -145,184 +145,190 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 21 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 18 (ONE_TO_ONE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 22 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (ONE_TO_ONE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 20 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 15 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 21 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_395] - Limit [LIM_394] (rows=100 width=77) + Reducer 7 vectorized + File Output Operator [FS_398] + Limit [LIM_397] (rows=100 width=260) Number of rows:100 - Select Operator [SEL_393] (rows=76653825 width=77) + Select Operator [SEL_396] (rows=130021 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_125] - Select Operator [SEL_124] (rows=76653825 width=77) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=130021 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_360] (rows=76653825 width=77) - Conds:RS_382._col0=RS_387._col0(Inner),RS_382._col0=RS_392._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_387] + Merge Join Operator [MERGEJOIN_363] (rows=130021 width=124) + Conds:RS_123._col0=RS_395._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_395] PartitionCols:_col0 - Group By Operator [GBY_386] (rows=34842647 width=77) + Group By Operator [GBY_394] (rows=130021 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_77] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=69685294 width=77) + Group By Operator [GBY_116] (rows=390063 width=108) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_358] (rows=69685294 width=77) - Conds:RS_72._col0=RS_73._col0(Inner),Output:["_col2","_col4"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_73] + Merge Join Operator [MERGEJOIN_361] (rows=5752600 width=103) + Conds:RS_112._col0=RS_113._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_113] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_350] (rows=80353 width=1119) - Conds:RS_371._col1=RS_380._col0(Inner),Output:["_col0"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_371] + Merge Join Operator [MERGEJOIN_352] (rows=5070 width=4) + Conds:RS_374._col1=RS_383._col0(Inner),Output:["_col0"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_374] PartitionCols:_col1 - Select Operator [SEL_370] (rows=73049 width=1119) + Select Operator [SEL_373] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_369] (rows=73049 width=1119) + Filter Operator [FIL_372] (rows=73049 width=98) predicate:(d_date is not null and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_380] + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_383] PartitionCols:_col0 - Group By Operator [GBY_379] (rows=40176 width=1119) + Group By Operator [GBY_382] (rows=5070 width=94) Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=80353 width=1119) + Group By Operator [GBY_21] (rows=5070 width=94) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_349] (rows=80353 width=1119) - Conds:RS_374._col1=RS_378._col0(Left Semi),Output:["_col0"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_374] + Merge Join Operator [MERGEJOIN_351] (rows=10141 width=94) + Conds:RS_377._col1=RS_381._col0(Left Semi),Output:["_col0"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_377] PartitionCols:_col1 - Select Operator [SEL_373] (rows=73049 width=1119) + Select Operator [SEL_376] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_372] (rows=73049 width=1119) + Filter Operator [FIL_375] (rows=73049 width=98) predicate:(d_date is not null and d_week_seq is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_378] + TableScan [TS_9] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_381] PartitionCols:_col0 - Group By Operator [GBY_377] (rows=73049 width=1119) + Group By Operator [GBY_380] (rows=1826 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_376] (rows=73049 width=1119) + Select Operator [SEL_379] (rows=3652 width=4) Output:["_col0"] - Filter Operator [FIL_375] (rows=73049 width=1119) + Filter Operator [FIL_378] (rows=3652 width=98) predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_72] + TableScan [TS_12] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_112] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_351] (rows=63350266 width=77) - Conds:RS_385._col1=RS_367._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_367] + Merge Join Operator [MERGEJOIN_356] (rows=13749816 width=107) + Conds:RS_393._col1=RS_371._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_371] PartitionCols:_col0 - Select Operator [SEL_365] (rows=462000 width=1436) + Select Operator [SEL_368] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_364] (rows=462000 width=1436) + Filter Operator [FIL_367] (rows=462000 width=104) predicate:(i_item_id is not null and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_385] + TableScan [TS_3] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_393] PartitionCols:_col1 - Select Operator [SEL_384] (rows=57591150 width=77) + Select Operator [SEL_392] (rows=13749816 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_383] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_40] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] - <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_392] + Filter Operator [FIL_391] (rows=13749816 width=11) + predicate:(wr_item_sk is not null and wr_returned_date_sk is not null) + TableScan [TS_80] (rows=14398467 width=11) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_123] PartitionCols:_col0 - Group By Operator [GBY_391] (rows=8711072 width=92) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_362] (rows=134905 width=116) + Conds:RS_385._col0=RS_390._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_390] PartitionCols:_col0 - Group By Operator [GBY_116] (rows=17422145 width=92) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_359] (rows=17422145 width=92) - Conds:RS_112._col0=RS_113._col0(Inner),Output:["_col2","_col4"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_113] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_350] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_112] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_354] (rows=15838314 width=92) - Conds:RS_390._col1=RS_368._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_368] + Group By Operator [GBY_389] (rows=141711 width=108) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Group By Operator [GBY_76] (rows=462000 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_360] (rows=25343167 width=103) + Conds:RS_72._col0=RS_73._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_73] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_365] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_390] - PartitionCols:_col1 - Select Operator [SEL_389] (rows=14398467 width=92) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_388] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_returned_date_sk is not null) - TableScan [TS_80] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] - <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_382] - PartitionCols:_col0 - Group By Operator [GBY_381] (rows=17423323 width=106) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_37] + Please refer to the previous Merge Join Operator [MERGEJOIN_352] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_353] (rows=55578005 width=107) + Conds:RS_388._col1=RS_370._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_370] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_368] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_388] + PartitionCols:_col1 + Select Operator [SEL_387] (rows=55578005 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_386] (rows=55578005 width=11) + predicate:(sr_item_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_40] (rows=57591150 width=11) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_385] PartitionCols:_col0 - Group By Operator [GBY_36] (rows=34846646 width=106) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_357] (rows=34846646 width=106) - Conds:RS_32._col0=RS_33._col0(Inner),Output:["_col2","_col4"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_350] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_348] (rows=31678769 width=106) - Conds:RS_363._col1=RS_366._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_366] + Group By Operator [GBY_384] (rows=134905 width=108) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Group By Operator [GBY_36] (rows=462000 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_359] (rows=12501392 width=103) + Conds:RS_32._col0=RS_33._col0(Inner),Output:["_col2","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_365] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_363] - PartitionCols:_col1 - Select Operator [SEL_362] (rows=28798881 width=106) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_361] (rows=28798881 width=106) - predicate:(cr_item_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_0] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"] + Please refer to the previous Merge Join Operator [MERGEJOIN_352] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_350] (rows=28798881 width=107) + Conds:RS_366._col1=RS_369._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_369] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_368] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_366] + PartitionCols:_col1 + Select Operator [SEL_365] (rows=28798881 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_364] (rows=28798881 width=11) + predicate:(cr_item_sk is not null and cr_returned_date_sk is not null) + TableScan [TS_0] (rows=28798881 width=11) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query84.q.out b/ql/src/test/results/clientpositive/perf/tez/query84.q.out index b30736f1c4..d4c0125cfb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query84.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query84.q.out @@ -55,95 +55,101 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 11 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 7 <- Reducer 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 vectorized - File Output Operator [FS_137] - Limit [LIM_136] (rows=100 width=860) + Reducer 4 vectorized + File Output Operator [FS_141] + Limit [LIM_140] (rows=100 width=384) Number of rows:100 - Select Operator [SEL_135] (rows=212960011 width=860) + Select Operator [SEL_139] (rows=255285 width=384) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=212960011 width=860) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_36] + Select Operator [SEL_35] (rows=255285 width=384) Output:["_col1","_col2"] - Merge Join Operator [MERGEJOIN_116] (rows=212960011 width=860) - Conds:RS_119._col0=RS_122._col0(Inner),RS_122._col0=RS_31._col1(Inner),Output:["_col2","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] - PartitionCols:_col0 - Select Operator [SEL_118] (rows=57591150 width=77) - Output:["_col0"] - Filter Operator [FIL_117] (rows=57591150 width=77) - predicate:sr_cdemo_sk is not null - TableScan [TS_0] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_cdemo_sk"] - <-Map 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - PartitionCols:_col0 - Select Operator [SEL_121] (rows=1861800 width=385) - Output:["_col0"] - Filter Operator [FIL_120] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_31] + Merge Join Operator [MERGEJOIN_120] (rows=255285 width=280) + Conds:RS_32._col1=RS_33._col1(Inner),Output:["_col2","_col6","_col7"] + <-Reducer 2 [ONE_TO_ONE_EDGE] + FORWARD [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_115] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_116] (rows=56363634 width=4) + Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_122] (rows=55577698 width=3) + Output:["_col0"] + Filter Operator [FIL_121] (rows=55577698 width=3) + predicate:sr_cdemo_sk is not null + TableScan [TS_0] (rows=57591150 width=3) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_cdemo_sk"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=1861800 width=4) + Output:["_col0"] + Filter Operator [FIL_124] (rows=1861800 width=4) + predicate:cd_demo_sk is not null + TableScan [TS_3] (rows=1861800 width=4) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_119] (rows=8315 width=280) Conds:RS_25._col2=RS_26._col0(Inner),Output:["_col0","_col1","_col4","_col5"] - <-Reducer 10 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_114] (rows=7920 width=107) - Conds:RS_131._col1=RS_134._col0(Inner),Output:["_col0"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_134] + Merge Join Operator [MERGEJOIN_118] (rows=721 width=4) + Conds:RS_135._col1=RS_138._col0(Inner),Output:["_col0"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + PartitionCols:_col1 + Select Operator [SEL_134] (rows=7200 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_133] (rows=7200 width=8) + predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) + TableScan [TS_12] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_133] (rows=2 width=12) + Select Operator [SEL_137] (rows=2 width=12) Output:["_col0"] - Filter Operator [FIL_132] (rows=2 width=12) + Filter Operator [FIL_136] (rows=2 width=12) predicate:((ib_lower_bound >= 32287) and (ib_upper_bound <= 82287) and ib_income_band_sk is not null) TableScan [TS_15] (rows=20 width=12) - default@income_band,income_band,Tbl:COMPLETE,Col:NONE,Output:["ib_income_band_sk","ib_lower_bound","ib_upper_bound"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - PartitionCols:_col1 - Select Operator [SEL_130] (rows=7200 width=107) - Output:["_col0","_col1"] - Filter Operator [FIL_129] (rows=7200 width=107) - predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) - TableScan [TS_12] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Reducer 6 [SIMPLE_EDGE] + default@income_band,income_band,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk","ib_lower_bound","ib_upper_bound"] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_113] (rows=88000001 width=860) - Conds:RS_125._col3=RS_128._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + Merge Join Operator [MERGEJOIN_117] (rows=83148 width=280) + Conds:RS_129._col3=RS_132._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] PartitionCols:_col3 - Select Operator [SEL_124] (rows=80000000 width=860) + Select Operator [SEL_128] (rows=74500295 width=291) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_123] (rows=80000000 width=860) + Filter Operator [FIL_127] (rows=74500295 width=291) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) - TableScan [TS_6] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_id","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_name","c_last_name"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + TableScan [TS_6] (rows=80000000 width=291) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_id","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_name","c_last_name"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] PartitionCols:_col0 - Select Operator [SEL_127] (rows=20000000 width=1014) + Select Operator [SEL_131] (rows=44643 width=96) Output:["_col0"] - Filter Operator [FIL_126] (rows=20000000 width=1014) + Filter Operator [FIL_130] (rows=44643 width=97) predicate:((ca_city = 'Hopewell') and ca_address_sk is not null) - TableScan [TS_9] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_city"] + TableScan [TS_9] (rows=40000000 width=97) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index a6e50e4b65..f5800b95ea 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,17 +183,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 11 <- Reducer 15 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 @@ -201,145 +200,134 @@ Stage-0 limit:-1 Stage-1 Reducer 10 vectorized - File Output Operator [FS_244] - Limit [LIM_243] (rows=100 width=385) + File Output Operator [FS_239] + Limit [LIM_238] (rows=72 width=832) Number of rows:100 - Select Operator [SEL_242] (rows=1023990 width=385) + Select Operator [SEL_237] (rows=72 width=832) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] - Select Operator [SEL_240] (rows=1023990 width=385) + SHUFFLE [RS_236] + Select Operator [SEL_235] (rows=72 width=832) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_239] (rows=1023990 width=385) + Group By Operator [GBY_234] (rows=72 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=2047980 width=385) + Group By Operator [GBY_48] (rows=72 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22 - Merge Join Operator [MERGEJOIN_206] (rows=2047980 width=385) - Conds:RS_44._col3, _col24, _col25=RS_237._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] + Merge Join Operator [MERGEJOIN_206] (rows=8055 width=100) + Conds:RS_44._col3, _col24, _col25=RS_232._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_236] (rows=1861800 width=385) + Select Operator [SEL_231] (rows=265971 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_235] (rows=1861800 width=385) + Filter Operator [FIL_230] (rows=265971 width=183) predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_21] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + TableScan [TS_21] (rows=1861800 width=183) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3, _col24, _col25 - Filter Operator [FIL_43] (rows=894726 width=1014) + Filter Operator [FIL_43] (rows=8055 width=390) predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_205] (rows=10736730 width=1014) - Conds:RS_40._col1=RS_238._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] + Merge Join Operator [MERGEJOIN_205] (rows=24166 width=390) + Conds:RS_40._col1=RS_233._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_236] + Please refer to the previous Select Operator [SEL_231] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_204] (rows=9760664 width=1014) - Conds:RS_37._col4=RS_234._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col12","_col13","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + Merge Join Operator [MERGEJOIN_204] (rows=24166 width=211) + Conds:RS_37._col4=RS_229._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col12","_col13","_col22"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_233] (rows=72 width=200) + Select Operator [SEL_228] (rows=72 width=101) Output:["_col0","_col1"] - Filter Operator [FIL_232] (rows=72 width=200) + Filter Operator [FIL_227] (rows=72 width=101) predicate:r_reason_sk is not null - TableScan [TS_15] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + TableScan [TS_15] (rows=72 width=101) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_203] (rows=8873331 width=1014) - Conds:RS_34._col8=RS_220._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col12","_col13"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + Merge Join Operator [MERGEJOIN_203] (rows=24166 width=114) + Conds:RS_34._col8=RS_212._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col12","_col13"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_219] (rows=36524 width=1119) + Select Operator [SEL_211] (rows=652 width=8) Output:["_col0"] - Filter Operator [FIL_218] (rows=36524 width=1119) + Filter Operator [FIL_210] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + TableScan [TS_12] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_202] (rows=8066665 width=1014) - Conds:RS_31._col10=RS_212._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col12","_col13"] + Merge Join Operator [MERGEJOIN_202] (rows=67680 width=244) + Conds:RS_31._col10=RS_226._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col12","_col13"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + SHUFFLE [RS_226] PartitionCols:_col0 - Select Operator [SEL_211] (rows=4602 width=585) + Select Operator [SEL_225] (rows=4602 width=4) Output:["_col0"] - Filter Operator [FIL_210] (rows=4602 width=585) + Filter Operator [FIL_224] (rows=4602 width=4) predicate:wp_web_page_sk is not null - TableScan [TS_9] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + TableScan [TS_9] (rows=4602 width=4) + default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col10 - Filter Operator [FIL_30] (rows=7333332 width=1014) + Filter Operator [FIL_30] (rows=67680 width=446) predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_201] (rows=22000000 width=1014) - Conds:RS_27._col2=RS_231._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col16"] + Merge Join Operator [MERGEJOIN_201] (rows=1150579 width=446) + Conds:RS_27._col2=RS_223._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col16"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] + SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_230] (rows=20000000 width=1014) + Select Operator [SEL_222] (rows=3529412 width=187) Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=20000000 width=1014) + Filter Operator [FIL_221] (rows=3529412 width=187) predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + TableScan [TS_6] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_200] (rows=17600325 width=135) - Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_200] (rows=13039884 width=466) + Conds:RS_209._col0, _col5=RS_220._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_209] PartitionCols:_col0, _col5 - Select Operator [SEL_208] (rows=14398467 width=92) + Select Operator [SEL_208] (rows=11975292 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_207] (rows=14398467 width=92) + Filter Operator [FIL_207] (rows=11975292 width=237) predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + TableScan [TS_0] (rows=14398467 width=237) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] + SHUFFLE [RS_220] PartitionCols:_col1, _col3 - Select Operator [SEL_227] (rows=16000296 width=135) + Select Operator [SEL_219] (rows=15992347 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_226] (rows=16000296 width=135) - predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_32_web_page_wp_web_page_sk_min) AND DynamicValue(RS_32_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_32_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_218] (rows=15992347 width=243) + predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_3] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized BROADCAST [RS_217] Group By Operator [GBY_216] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_215] Group By Operator [GBY_214] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=4602 width=585) + Select Operator [SEL_213] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_211] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Group By Operator [GBY_222] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] diff --git a/ql/src/test/results/clientpositive/perf/tez/query86.q.out b/ql/src/test/results/clientpositive/perf/tez/query86.q.out index 9ffee6d52c..22d494ba38 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query86.q.out @@ -59,8 +59,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) @@ -73,78 +72,67 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_89] - Limit [LIM_88] (rows=100 width=135) + File Output Operator [FS_84] + Limit [LIM_83] (rows=100 width=490) Number of rows:100 - Select Operator [SEL_87] (rows=261364852 width=135) + Select Operator [SEL_82] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_86] - Select Operator [SEL_85] (rows=261364852 width=135) + SHUFFLE [RS_81] + Select Operator [SEL_80] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_84] (rows=261364852 width=135) + PTF Operator [PTF_79] (rows=3060 width=302) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_83] (rows=261364852 width=135) + Select Operator [SEL_78] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_82] + SHUFFLE [RS_77] PartitionCols:(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_81] (rows=261364852 width=135) + Select Operator [SEL_76] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_80] (rows=261364852 width=135) + Group By Operator [GBY_75] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=522729705 width=135) + Group By Operator [GBY_17] (rows=88740 width=302) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L - Select Operator [SEL_15] (rows=174243235 width=135) + Select Operator [SEL_15] (rows=24992810 width=293) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_60] (rows=174243235 width=135) - Conds:RS_12._col1=RS_71._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_60] (rows=24992810 width=293) + Conds:RS_12._col1=RS_74._col0(Inner),Output:["_col2","_col6","_col7"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] + SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_70] (rows=462000 width=1436) + Select Operator [SEL_73] (rows=462000 width=186) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_69] (rows=462000 width=1436) + Filter Operator [FIL_72] (rows=462000 width=186) predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + TableScan [TS_6] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_59] (rows=158402938 width=135) - Conds:RS_79._col0=RS_63._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_59] (rows=24992810 width=115) + Conds:RS_71._col0=RS_63._col0(Inner),Output:["_col1","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_62] (rows=73049 width=1119) + Select Operator [SEL_62] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_61] (rows=73049 width=1119) + Filter Operator [FIL_61] (rows=317 width=8) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_79] + SHUFFLE [RS_71] PartitionCols:_col0 - Select Operator [SEL_78] (rows=144002668 width=135) + Select Operator [SEL_70] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_77] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_10_d1_d_date_sk_min) AND DynamicValue(RS_10_d1_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_d1_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_net_paid"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_76] - Group By Operator [GBY_75] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_74] - Group By Operator [GBY_73] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_72] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_70] + Filter Operator [FIL_69] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_d1_d_date_sk_min) AND DynamicValue(RS_10_d1_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_d1_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_net_paid"] <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_68] Group By Operator [GBY_67] (rows=1 width=12) @@ -153,7 +141,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_66] Group By Operator [GBY_65] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_64] (rows=73049 width=1119) + Select Operator [SEL_64] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_62] diff --git a/ql/src/test/results/clientpositive/perf/tez/query87.q.out b/ql/src/test/results/clientpositive/perf/tez/query87.q.out index 986dce81f6..4006cfb876 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query87.q.out @@ -55,21 +55,19 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) -Map 23 <- Reducer 15 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) -Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE) +Map 21 <- Reducer 15 (BROADCAST_EDGE) +Map 22 <- Reducer 19 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) Reducer 17 <- Map 20 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 20 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) @@ -81,214 +79,203 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_280] - Group By Operator [GBY_279] (rows=1 width=24) + File Output Operator [FS_272] + Group By Operator [GBY_271] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_278] - Group By Operator [GBY_277] (rows=1 width=24) + PARTITION_ONLY_SHUFFLE [RS_270] + Group By Operator [GBY_269] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_276] (rows=4537552 width=129) - Filter Operator [FIL_275] (rows=4537552 width=129) + Select Operator [SEL_268] (rows=8062883 width=16) + Filter Operator [FIL_267] (rows=8062883 width=16) predicate:(((_col3 * 2) = _col4) and (_col3 > 0L)) - Select Operator [SEL_274] (rows=27225312 width=129) + Select Operator [SEL_266] (rows=48377300 width=16) Output:["_col3","_col4"] - Group By Operator [GBY_273] (rows=27225312 width=129) + Group By Operator [GBY_265] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 7 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_306] + Reduce Output Operator [RS_296] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_305] (rows=54450625 width=129) + Group By Operator [GBY_295] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_304] (rows=54450625 width=129) + Select Operator [SEL_294] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_303] (rows=43560808 width=135) + Select Operator [SEL_293] (rows=24986582 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_302] (rows=43560808 width=135) + Group By Operator [GBY_292] (rows=24986582 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_301] (rows=87121617 width=135) + Select Operator [SEL_291] (rows=24986582 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_300] (rows=87121617 width=135) + Group By Operator [GBY_290] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_80] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_79] (rows=174243235 width=135) + Group By Operator [GBY_79] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_192] (rows=174243235 width=135) - Conds:RS_75._col1=RS_248._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_192] (rows=24986582 width=274) + Conds:RS_75._col1=RS_249._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + SHUFFLE [RS_249] PartitionCols:_col0 - Select Operator [SEL_243] (rows=80000000 width=860) + Select Operator [SEL_246] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_242] (rows=80000000 width=860) + Filter Operator [FIL_245] (rows=80000000 width=184) predicate:c_customer_sk is not null - TableScan [TS_6] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name"] + TableScan [TS_6] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_191] (rows=158402938 width=135) - Conds:RS_299._col0=RS_232._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_191] (rows=24986582 width=97) + Conds:RS_289._col0=RS_232._col0(Inner),Output:["_col1","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_232] PartitionCols:_col0 - Select Operator [SEL_227] (rows=73049 width=1119) + Select Operator [SEL_227] (rows=317 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_226] (rows=73049 width=1119) + Filter Operator [FIL_226] (rows=317 width=102) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_298] (rows=144002668 width=135) + Select Operator [SEL_288] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_297] (rows=144002668 width=135) + Filter Operator [FIL_287] (rows=143930993 width=7) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_63] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + TableScan [TS_63] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_296] - Group By Operator [GBY_295] (rows=1 width=12) + BROADCAST [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_239] Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_233] (rows=73049 width=1119) + Select Operator [SEL_233] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_227] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_272] + Reduce Output Operator [RS_264] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_271] (rows=54450625 width=129) + Group By Operator [GBY_263] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_270] (rows=54450625 width=129) + Select Operator [SEL_262] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_269] (rows=10889817 width=103) + Select Operator [SEL_261] (rows=23390718 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_268] (rows=10889817 width=103) + Group By Operator [GBY_260] (rows=23390718 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_267] (rows=21779634 width=103) + Select Operator [SEL_259] (rows=23390718 width=290) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_266] (rows=21779634 width=103) + Filter Operator [FIL_258] (rows=23390718 width=290) predicate:(((_col3 * 2) = _col4) and (_col3 > 0L)) - Group By Operator [GBY_265] (rows=130677808 width=103) + Group By Operator [GBY_257] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 5 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_294] + Reduce Output Operator [RS_284] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_293] (rows=261355616 width=103) + Group By Operator [GBY_283] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_292] (rows=261355616 width=103) + Select Operator [SEL_282] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_291] (rows=87116929 width=135) + Select Operator [SEL_281] (rows=49146883 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_290] (rows=87116929 width=135) + Group By Operator [GBY_280] (rows=49146883 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_289] (rows=174233858 width=135) + Select Operator [SEL_279] (rows=49146883 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_288] (rows=174233858 width=135) + Group By Operator [GBY_278] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_41] (rows=348467716 width=135) + Group By Operator [GBY_41] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_190] (rows=348467716 width=135) - Conds:RS_37._col1=RS_246._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_190] (rows=49146883 width=274) + Conds:RS_37._col1=RS_248._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_246] + SHUFFLE [RS_248] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_243] + Please refer to the previous Select Operator [SEL_246] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_189] (rows=316788826 width=135) - Conds:RS_287._col0=RS_230._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_189] (rows=49146883 width=97) + Conds:RS_277._col0=RS_230._col0(Inner),Output:["_col1","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_230] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_227] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_287] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_277] PartitionCols:_col0 - Select Operator [SEL_286] (rows=287989836 width=135) + Select Operator [SEL_276] (rows=285117831 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_285] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_38_customer_c_customer_sk_min) AND DynamicValue(RS_38_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_38_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_25] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + Filter Operator [FIL_275] (rows=285117831 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_25] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_282] - Group By Operator [GBY_281] (rows=1 width=12) + BROADCAST [RS_274] + Group By Operator [GBY_273] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_238] Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_231] (rows=73049 width=1119) + Select Operator [SEL_231] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_227] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_284] - Group By Operator [GBY_283] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_252] - Group By Operator [GBY_250] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_247] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_243] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_264] + Reduce Output Operator [RS_256] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_263] (rows=261355616 width=103) + Group By Operator [GBY_255] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_262] (rows=261355616 width=103) + Select Operator [SEL_254] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_261] (rows=174238687 width=88) + Select Operator [SEL_253] (rows=91197425 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_260] (rows=174238687 width=88) + Group By Operator [GBY_252] (rows=91197425 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_259] (rows=348477374 width=88) + Select Operator [SEL_251] (rows=91197425 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_258] (rows=348477374 width=88) + Group By Operator [GBY_250] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:_col7, _col6, _col3 - Merge Join Operator [MERGEJOIN_188] (rows=696954748 width=88) - Conds:RS_12._col1=RS_244._col0(Inner),Output:["_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_188] (rows=91197425 width=274) + Conds:RS_12._col1=RS_247._col0(Inner),Output:["_col3","_col6","_col7"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + SHUFFLE [RS_247] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_243] + Please refer to the previous Select Operator [SEL_246] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_187] (rows=633595212 width=88) - Conds:RS_257._col0=RS_228._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_187] (rows=91197425 width=96) + Conds:RS_244._col0=RS_228._col0(Inner),Output:["_col1","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_228] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_227] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_256] (rows=575995635 width=88) + Select Operator [SEL_243] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_255] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_13_customer_c_customer_sk_min) AND DynamicValue(RS_13_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_13_customer_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + Filter Operator [FIL_242] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_241] Group By Operator [GBY_240] (rows=1 width=12) @@ -297,18 +284,7 @@ Stage-0 SHUFFLE [RS_237] Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_229] (rows=73049 width=1119) + Select Operator [SEL_229] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_227] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_254] - Group By Operator [GBY_253] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] - Group By Operator [GBY_249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_245] (rows=80000000 width=860) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_243] diff --git a/ql/src/test/results/clientpositive/perf/tez/query88.q.out b/ql/src/test/results/clientpositive/perf/tez/query88.q.out index 2d467f80da..28a970755c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query88.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query88.q.out @@ -1,4 +1,10 @@ -Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product PREHOOK: query: explain select * from @@ -196,715 +202,745 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 45 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 62 <- Reducer 13 (BROADCAST_EDGE), Reducer 46 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE) -Map 63 <- Reducer 18 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 56 (BROADCAST_EDGE) -Map 64 <- Reducer 23 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 57 (BROADCAST_EDGE) -Map 65 <- Reducer 28 (BROADCAST_EDGE), Reducer 49 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE) -Map 66 <- Reducer 33 (BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE) -Map 67 <- Reducer 38 (BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE) -Map 68 <- Reducer 43 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 61 (BROADCAST_EDGE) -Reducer 10 <- Map 44 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 53 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 63 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 15 <- Map 44 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 53 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 64 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Map 44 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 53 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 65 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 25 <- Map 44 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Map 53 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 66 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 44 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 44 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 53 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 67 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 35 <- Map 44 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 36 <- Map 53 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) -Reducer 37 <- Reducer 36 (CUSTOM_SIMPLE_EDGE) -Reducer 38 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 39 <- Map 68 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 4 <- Map 53 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 40 <- Map 44 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) -Reducer 41 <- Map 53 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) -Reducer 42 <- Reducer 41 (CUSTOM_SIMPLE_EDGE) -Reducer 43 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 45 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 46 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 47 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 48 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 49 <- Map 44 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE) +Map 68 <- Reducer 19 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 61 (BROADCAST_EDGE) +Map 69 <- Reducer 24 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE) +Map 70 <- Reducer 29 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE) +Map 71 <- Reducer 34 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE), Reducer 64 (BROADCAST_EDGE) +Map 72 <- Reducer 39 (BROADCAST_EDGE), Reducer 56 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) +Map 73 <- Reducer 44 (BROADCAST_EDGE), Reducer 57 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE) +Map 74 <- Reducer 49 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) +Reducer 10 <- Reducer 38 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 43 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 48 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 68 (SIMPLE_EDGE) +Reducer 16 <- Map 50 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 59 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 20 <- Map 13 (SIMPLE_EDGE), Map 69 (SIMPLE_EDGE) +Reducer 21 <- Map 50 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 59 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 13 (SIMPLE_EDGE), Map 70 (SIMPLE_EDGE) +Reducer 26 <- Map 50 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 59 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 50 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 13 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE) +Reducer 31 <- Map 50 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 59 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 35 <- Map 13 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) +Reducer 36 <- Map 50 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) +Reducer 37 <- Map 59 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 59 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 40 <- Map 13 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) +Reducer 41 <- Map 50 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) +Reducer 42 <- Map 59 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) +Reducer 43 <- Reducer 42 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 45 <- Map 13 (SIMPLE_EDGE), Map 74 (SIMPLE_EDGE) +Reducer 46 <- Map 50 (SIMPLE_EDGE), Reducer 45 (SIMPLE_EDGE) +Reducer 47 <- Map 59 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE) +Reducer 48 <- Reducer 47 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 13 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 50 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 51 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 52 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 54 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 55 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 56 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 57 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 58 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 37 (CUSTOM_SIMPLE_EDGE), Reducer 42 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 60 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 61 <- Map 53 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 62 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 51 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 52 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 54 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 55 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 56 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 57 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 58 <- Map 50 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 60 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 61 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 62 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 63 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 65 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 66 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 67 <- Map 59 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 - File Output Operator [FS_218] - Select Operator [SEL_217] (rows=1 width=65) + Reducer 12 + File Output Operator [FS_230] + Select Operator [SEL_229] (rows=1 width=64) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_587] (rows=1 width=65) - Conds:(Inner),(Inner),(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_725] - Group By Operator [GBY_724] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_49] - Group By Operator [GBY_48] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_568] (rows=766650239 width=88) - Conds:RS_44._col2=RS_678._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_678] - PartitionCols:_col0 - Select Operator [SEL_675] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_674] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_567] (rows=696954748 width=88) - Conds:RS_41._col0=RS_642._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_642] - PartitionCols:_col0 - Select Operator [SEL_633] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_625] (rows=14400 width=471) - predicate:((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) - TableScan [TS_6] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_566] (rows=633595212 width=88) - Conds:RS_723._col1=RS_592._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_592] + Merge Join Operator [MERGEJOIN_605] (rows=1 width=64) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_226] + Merge Join Operator [MERGEJOIN_604] (rows=1 width=56) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_223] + Merge Join Operator [MERGEJOIN_603] (rows=1 width=48) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 38 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_787] + Group By Operator [GBY_786] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_153] + Group By Operator [GBY_152] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_592] (rows=1352994 width=8) + Conds:RS_148._col2=RS_704._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_704] PartitionCols:_col0 - Select Operator [SEL_589] (rows=2000 width=107) + Select Operator [SEL_693] (rows=155 width=91) Output:["_col0"] - Filter Operator [FIL_588] (rows=2000 width=107) - predicate:((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) - TableScan [TS_3] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Map 62 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_723] - PartitionCols:_col1 - Select Operator [SEL_722] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_721] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_39_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_39_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_39_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_26] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_716] - Group By Operator [GBY_715] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_615] - Group By Operator [GBY_607] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_593] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 46 [BROADCAST_EDGE] vectorized - BROADCAST [RS_718] - Group By Operator [GBY_717] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_665] - Group By Operator [GBY_657] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_643] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_633] - <-Reducer 55 [BROADCAST_EDGE] vectorized - BROADCAST [RS_720] - Group By Operator [GBY_719] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_701] - Group By Operator [GBY_693] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_679] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_736] - Group By Operator [GBY_735] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_75] - Group By Operator [GBY_74] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_571] (rows=766650239 width=88) - Conds:RS_70._col2=RS_680._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_680] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_570] (rows=696954748 width=88) - Conds:RS_67._col0=RS_644._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_644] - PartitionCols:_col0 - Select Operator [SEL_634] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_626] (rows=14400 width=471) - predicate:((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_569] (rows=633595212 width=88) - Conds:RS_734._col1=RS_594._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_594] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 63 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_734] - PartitionCols:_col1 - Select Operator [SEL_733] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_732] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_65_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_65_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_65_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_68_time_dim_t_time_sk_min) AND DynamicValue(RS_68_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_68_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_71_store_s_store_sk_min) AND DynamicValue(RS_71_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_71_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_52] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_727] - Group By Operator [GBY_726] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_616] - Group By Operator [GBY_608] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_595] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 47 [BROADCAST_EDGE] vectorized - BROADCAST [RS_729] - Group By Operator [GBY_728] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_666] - Group By Operator [GBY_658] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_645] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_634] - <-Reducer 56 [BROADCAST_EDGE] vectorized - BROADCAST [RS_731] - Group By Operator [GBY_730] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_702] - Group By Operator [GBY_694] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_681] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_747] - Group By Operator [GBY_746] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_101] - Group By Operator [GBY_100] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_574] (rows=766650239 width=88) - Conds:RS_96._col2=RS_682._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_682] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_96] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_573] (rows=696954748 width=88) - Conds:RS_93._col0=RS_646._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_646] - PartitionCols:_col0 - Select Operator [SEL_635] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_627] (rows=14400 width=471) - predicate:((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_572] (rows=633595212 width=88) - Conds:RS_745._col1=RS_596._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_596] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 64 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_745] - PartitionCols:_col1 - Select Operator [SEL_744] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_743] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_91_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_91_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_91_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_94_time_dim_t_time_sk_min) AND DynamicValue(RS_94_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_94_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_97_store_s_store_sk_min) AND DynamicValue(RS_97_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_97_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_78] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_738] - Group By Operator [GBY_737] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_617] - Group By Operator [GBY_609] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_597] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 48 [BROADCAST_EDGE] vectorized - BROADCAST [RS_740] - Group By Operator [GBY_739] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_667] - Group By Operator [GBY_659] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_647] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_635] - <-Reducer 57 [BROADCAST_EDGE] vectorized - BROADCAST [RS_742] - Group By Operator [GBY_741] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_703] - Group By Operator [GBY_695] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_683] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_758] - Group By Operator [GBY_757] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 26 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_577] (rows=766650239 width=88) - Conds:RS_122._col2=RS_684._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_684] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_122] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_576] (rows=696954748 width=88) - Conds:RS_119._col0=RS_648._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_648] - PartitionCols:_col0 - Select Operator [SEL_636] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_628] (rows=14400 width=471) - predicate:((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_119] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_575] (rows=633595212 width=88) - Conds:RS_756._col1=RS_598._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_598] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_756] - PartitionCols:_col1 - Select Operator [SEL_755] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_754] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_117_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_117_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_117_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_120_time_dim_t_time_sk_min) AND DynamicValue(RS_120_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_120_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_123_store_s_store_sk_min) AND DynamicValue(RS_123_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_123_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_104] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_749] - Group By Operator [GBY_748] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_618] - Group By Operator [GBY_610] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_599] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 49 [BROADCAST_EDGE] vectorized - BROADCAST [RS_751] - Group By Operator [GBY_750] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_668] - Group By Operator [GBY_660] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_649] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_636] - <-Reducer 58 [BROADCAST_EDGE] vectorized - BROADCAST [RS_753] - Group By Operator [GBY_752] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_704] - Group By Operator [GBY_696] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_685] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_769] - Group By Operator [GBY_768] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_153] - Group By Operator [GBY_152] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_580] (rows=766650239 width=88) - Conds:RS_148._col2=RS_686._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_686] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_148] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_579] (rows=696954748 width=88) - Conds:RS_145._col0=RS_650._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_650] - PartitionCols:_col0 - Select Operator [SEL_637] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_629] (rows=14400 width=471) - predicate:((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_145] + Filter Operator [FIL_692] (rows=155 width=92) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_9] (rows=1704 width=92) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_148] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_591] (rows=1842898 width=0) + Conds:RS_145._col0=RS_668._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_668] + PartitionCols:_col0 + Select Operator [SEL_655] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_647] (rows=1515 width=12) + predicate:((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) + TableScan [TS_6] (rows=86400 width=12) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_145] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_590] (rows=56928540 width=4) + Conds:RS_785._col1=RS_618._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_618] + PartitionCols:_col0 + Select Operator [SEL_607] (rows=817 width=12) + Output:["_col0"] + Filter Operator [FIL_606] (rows=817 width=12) + predicate:((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) + TableScan [TS_3] (rows=7200 width=12) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Map 72 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_785] + PartitionCols:_col1 + Select Operator [SEL_784] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_783] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_143_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_143_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_143_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_146_time_dim_t_time_sk_min) AND DynamicValue(RS_146_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_146_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_149_store_s_store_sk_min) AND DynamicValue(RS_149_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_149_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_130] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 39 [BROADCAST_EDGE] vectorized + BROADCAST [RS_778] + Group By Operator [GBY_777] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_637] + Group By Operator [GBY_629] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_619] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 56 [BROADCAST_EDGE] vectorized + BROADCAST [RS_780] + Group By Operator [GBY_779] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_687] + Group By Operator [GBY_679] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_669] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_655] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_782] + Group By Operator [GBY_781] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_723] + Group By Operator [GBY_715] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_705] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_220] + Merge Join Operator [MERGEJOIN_602] (rows=1 width=40) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_776] + Group By Operator [GBY_775] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_589] (rows=1352994 width=8) + Conds:RS_122._col2=RS_702._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_702] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_693] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_122] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_588] (rows=1842898 width=0) + Conds:RS_119._col0=RS_666._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_666] + PartitionCols:_col0 + Select Operator [SEL_654] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_646] (rows=1515 width=12) + predicate:((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_587] (rows=56928540 width=4) + Conds:RS_774._col1=RS_616._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_616] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Map 71 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_774] + PartitionCols:_col1 + Select Operator [SEL_773] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_772] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_117_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_117_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_117_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_120_time_dim_t_time_sk_min) AND DynamicValue(RS_120_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_120_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_123_store_s_store_sk_min) AND DynamicValue(RS_123_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_123_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_104] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_767] + Group By Operator [GBY_766] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_636] + Group By Operator [GBY_628] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_617] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 55 [BROADCAST_EDGE] vectorized + BROADCAST [RS_769] + Group By Operator [GBY_768] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_686] + Group By Operator [GBY_678] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_667] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_654] + <-Reducer 64 [BROADCAST_EDGE] vectorized + BROADCAST [RS_771] + Group By Operator [GBY_770] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_722] + Group By Operator [GBY_714] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_703] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_217] + Merge Join Operator [MERGEJOIN_601] (rows=1 width=32) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_765] + Group By Operator [GBY_764] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_101] + Group By Operator [GBY_100] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_586] (rows=1352994 width=8) + Conds:RS_96._col2=RS_700._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_700] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_693] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_585] (rows=1842898 width=0) + Conds:RS_93._col0=RS_664._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_664] + PartitionCols:_col0 + Select Operator [SEL_653] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_645] (rows=1515 width=12) + predicate:((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_584] (rows=56928540 width=4) + Conds:RS_763._col1=RS_614._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_614] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Map 70 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_763] + PartitionCols:_col1 + Select Operator [SEL_762] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_761] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_91_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_91_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_91_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_94_time_dim_t_time_sk_min) AND DynamicValue(RS_94_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_94_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_97_store_s_store_sk_min) AND DynamicValue(RS_97_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_97_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_78] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_756] + Group By Operator [GBY_755] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_635] + Group By Operator [GBY_627] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_615] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 54 [BROADCAST_EDGE] vectorized + BROADCAST [RS_758] + Group By Operator [GBY_757] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_685] + Group By Operator [GBY_677] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_665] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_653] + <-Reducer 63 [BROADCAST_EDGE] vectorized + BROADCAST [RS_760] + Group By Operator [GBY_759] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_721] + Group By Operator [GBY_713] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_701] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_214] + Merge Join Operator [MERGEJOIN_600] (rows=1 width=24) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_754] + Group By Operator [GBY_753] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_75] + Group By Operator [GBY_74] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_583] (rows=1352994 width=8) + Conds:RS_70._col2=RS_698._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_698] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_693] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_582] (rows=1842898 width=0) + Conds:RS_67._col0=RS_662._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_662] + PartitionCols:_col0 + Select Operator [SEL_652] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_644] (rows=1515 width=12) + predicate:((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_581] (rows=56928540 width=4) + Conds:RS_752._col1=RS_612._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_612] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_752] + PartitionCols:_col1 + Select Operator [SEL_751] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_750] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_65_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_65_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_65_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_68_time_dim_t_time_sk_min) AND DynamicValue(RS_68_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_68_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_71_store_s_store_sk_min) AND DynamicValue(RS_71_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_71_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_52] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_745] + Group By Operator [GBY_744] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_634] + Group By Operator [GBY_626] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_613] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_747] + Group By Operator [GBY_746] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_684] + Group By Operator [GBY_676] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_663] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_652] + <-Reducer 62 [BROADCAST_EDGE] vectorized + BROADCAST [RS_749] + Group By Operator [GBY_748] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_720] + Group By Operator [GBY_712] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_699] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_211] + Merge Join Operator [MERGEJOIN_599] (rows=1 width=16) + Conds:(Inner),Output:["_col0","_col1"] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_743] + Group By Operator [GBY_742] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_49] + Group By Operator [GBY_48] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_580] (rows=1352994 width=8) + Conds:RS_44._col2=RS_696._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_696] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_693] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_579] (rows=1842898 width=0) + Conds:RS_41._col0=RS_660._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_660] + PartitionCols:_col0 + Select Operator [SEL_651] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_643] (rows=1515 width=12) + predicate:((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_578] (rows=56928540 width=4) + Conds:RS_741._col1=RS_610._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_610] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Map 68 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_741] + PartitionCols:_col1 + Select Operator [SEL_740] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_739] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_39_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_39_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_39_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_26] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_734] + Group By Operator [GBY_733] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_633] + Group By Operator [GBY_625] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_611] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_736] + Group By Operator [GBY_735] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_683] + Group By Operator [GBY_675] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_661] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_651] + <-Reducer 61 [BROADCAST_EDGE] vectorized + BROADCAST [RS_738] + Group By Operator [GBY_737] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_719] + Group By Operator [GBY_711] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_697] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_732] + Group By Operator [GBY_731] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_23] + Group By Operator [GBY_22] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_577] (rows=1352994 width=8) + Conds:RS_18._col2=RS_694._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_694] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_693] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_576] (rows=1842898 width=0) + Conds:RS_15._col0=RS_658._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_658] + PartitionCols:_col0 + Select Operator [SEL_650] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_642] (rows=1515 width=12) + predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_575] (rows=56928540 width=4) + Conds:RS_730._col1=RS_608._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_608] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_730] + PartitionCols:_col1 + Select Operator [SEL_729] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_728] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_13_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_13_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_13_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_16_time_dim_t_time_sk_min) AND DynamicValue(RS_16_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_16_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_641] + Group By Operator [GBY_640] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_632] + Group By Operator [GBY_624] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_609] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 51 [BROADCAST_EDGE] vectorized + BROADCAST [RS_691] + Group By Operator [GBY_690] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_682] + Group By Operator [GBY_674] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_659] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_650] + <-Reducer 60 [BROADCAST_EDGE] vectorized + BROADCAST [RS_727] + Group By Operator [GBY_726] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_718] + Group By Operator [GBY_710] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_695] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_798] + Group By Operator [GBY_797] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 42 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_179] + Group By Operator [GBY_178] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_595] (rows=1352994 width=8) + Conds:RS_174._col2=RS_706._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_706] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_578] (rows=633595212 width=88) - Conds:RS_767._col1=RS_600._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_600] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_174] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_594] (rows=1842898 width=0) + Conds:RS_171._col0=RS_670._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_670] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_767] - PartitionCols:_col1 - Select Operator [SEL_766] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_765] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_143_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_143_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_143_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_146_time_dim_t_time_sk_min) AND DynamicValue(RS_146_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_146_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_149_store_s_store_sk_min) AND DynamicValue(RS_149_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_149_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_130] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_760] - Group By Operator [GBY_759] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_619] - Group By Operator [GBY_611] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_601] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 50 [BROADCAST_EDGE] vectorized - BROADCAST [RS_762] - Group By Operator [GBY_761] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_669] - Group By Operator [GBY_661] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_651] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_637] - <-Reducer 59 [BROADCAST_EDGE] vectorized - BROADCAST [RS_764] - Group By Operator [GBY_763] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_705] - Group By Operator [GBY_697] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_687] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_780] - Group By Operator [GBY_779] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 36 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_179] - Group By Operator [GBY_178] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_583] (rows=766650239 width=88) - Conds:RS_174._col2=RS_688._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_688] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 35 [SIMPLE_EDGE] - SHUFFLE [RS_174] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_582] (rows=696954748 width=88) - Conds:RS_171._col0=RS_652._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_652] - PartitionCols:_col0 - Select Operator [SEL_638] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_630] (rows=14400 width=471) - predicate:((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_171] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_581] (rows=633595212 width=88) - Conds:RS_778._col1=RS_602._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_602] + Select Operator [SEL_656] (rows=1515 width=12) + Output:["_col0"] + Filter Operator [FIL_648] (rows=1515 width=12) + predicate:((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_171] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_778] - PartitionCols:_col1 - Select Operator [SEL_777] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_776] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_169_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_169_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_169_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_172_time_dim_t_time_sk_min) AND DynamicValue(RS_172_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_172_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_175_store_s_store_sk_min) AND DynamicValue(RS_175_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_175_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_156] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_771] - Group By Operator [GBY_770] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_620] - Group By Operator [GBY_612] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_603] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 51 [BROADCAST_EDGE] vectorized - BROADCAST [RS_773] - Group By Operator [GBY_772] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_670] - Group By Operator [GBY_662] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_653] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_638] - <-Reducer 60 [BROADCAST_EDGE] vectorized - BROADCAST [RS_775] - Group By Operator [GBY_774] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_706] - Group By Operator [GBY_698] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_689] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_791] - Group By Operator [GBY_790] (rows=1 width=8) + Merge Join Operator [MERGEJOIN_593] (rows=56928540 width=4) + Conds:RS_796._col1=RS_620._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_620] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_607] + <-Map 73 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_796] + PartitionCols:_col1 + Select Operator [SEL_795] (rows=501695814 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_794] (rows=501695814 width=11) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_169_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_169_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_169_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_172_time_dim_t_time_sk_min) AND DynamicValue(RS_172_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_172_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_175_store_s_store_sk_min) AND DynamicValue(RS_175_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_175_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + TableScan [TS_156] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 44 [BROADCAST_EDGE] vectorized + BROADCAST [RS_789] + Group By Operator [GBY_788] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_638] + Group By Operator [GBY_630] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_621] (rows=817 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_607] + <-Reducer 57 [BROADCAST_EDGE] vectorized + BROADCAST [RS_791] + Group By Operator [GBY_790] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_688] + Group By Operator [GBY_680] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_671] (rows=1515 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_656] + <-Reducer 66 [BROADCAST_EDGE] vectorized + BROADCAST [RS_793] + Group By Operator [GBY_792] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_724] + Group By Operator [GBY_716] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_707] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 48 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_809] + Group By Operator [GBY_808] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 41 [CUSTOM_SIMPLE_EDGE] + <-Reducer 47 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_205] Group By Operator [GBY_204] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_586] (rows=766650239 width=88) - Conds:RS_200._col2=RS_690._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_690] + Merge Join Operator [MERGEJOIN_598] (rows=1352994 width=8) + Conds:RS_200._col2=RS_708._col0(Inner) + <-Map 59 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_708] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 40 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_693] + <-Reducer 46 [SIMPLE_EDGE] SHUFFLE [RS_200] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_585] (rows=696954748 width=88) - Conds:RS_197._col0=RS_654._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_654] + Merge Join Operator [MERGEJOIN_597] (rows=1842898 width=0) + Conds:RS_197._col0=RS_672._col0(Inner),Output:["_col2"] + <-Map 50 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_672] PartitionCols:_col0 - Select Operator [SEL_639] (rows=14400 width=471) + Select Operator [SEL_657] (rows=1515 width=12) Output:["_col0"] - Filter Operator [FIL_631] (rows=14400 width=471) + Filter Operator [FIL_649] (rows=1515 width=12) predicate:((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) Please refer to the previous TableScan [TS_6] - <-Reducer 39 [SIMPLE_EDGE] + <-Reducer 45 [SIMPLE_EDGE] SHUFFLE [RS_197] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_584] (rows=633595212 width=88) - Conds:RS_789._col1=RS_604._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_604] + Merge Join Operator [MERGEJOIN_596] (rows=56928540 width=4) + Conds:RS_807._col1=RS_622._col0(Inner),Output:["_col0","_col2"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_622] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 68 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_789] + Please refer to the previous Select Operator [SEL_607] + <-Map 74 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_807] PartitionCols:_col1 - Select Operator [SEL_788] (rows=575995635 width=88) + Select Operator [SEL_806] (rows=501695814 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_787] (rows=575995635 width=88) + Filter Operator [FIL_805] (rows=501695814 width=11) predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_195_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_195_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_195_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_198_time_dim_t_time_sk_min) AND DynamicValue(RS_198_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_198_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_201_store_s_store_sk_min) AND DynamicValue(RS_201_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_201_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_182] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_782] - Group By Operator [GBY_781] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_621] - Group By Operator [GBY_613] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_605] (rows=2000 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] - <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_784] - Group By Operator [GBY_783] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_671] - Group By Operator [GBY_663] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_655] (rows=14400 width=471) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] - <-Reducer 61 [BROADCAST_EDGE] vectorized - BROADCAST [RS_786] - Group By Operator [GBY_785] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_707] - Group By Operator [GBY_699] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_691] (rows=852 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_714] - Group By Operator [GBY_713] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_23] - Group By Operator [GBY_22] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_565] (rows=766650239 width=88) - Conds:RS_18._col2=RS_676._col0(Inner) - <-Map 53 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_676] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_675] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_564] (rows=696954748 width=88) - Conds:RS_15._col0=RS_640._col0(Inner),Output:["_col2"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_640] - PartitionCols:_col0 - Select Operator [SEL_632] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_624] (rows=14400 width=471) - predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_563] (rows=633595212 width=88) - Conds:RS_712._col1=RS_590._col0(Inner),Output:["_col0","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_590] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_589] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_712] - PartitionCols:_col1 - Select Operator [SEL_711] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_710] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_13_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_13_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_13_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_16_time_dim_t_time_sk_min) AND DynamicValue(RS_16_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_16_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_673] - Group By Operator [GBY_672] (rows=1 width=12) + TableScan [TS_182] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Reducer 49 [BROADCAST_EDGE] vectorized + BROADCAST [RS_800] + Group By Operator [GBY_799] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_664] - Group By Operator [GBY_656] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_639] + Group By Operator [GBY_631] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_641] (rows=14400 width=471) + Select Operator [SEL_623] (rows=817 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_632] - <-Reducer 54 [BROADCAST_EDGE] vectorized - BROADCAST [RS_709] - Group By Operator [GBY_708] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_607] + <-Reducer 58 [BROADCAST_EDGE] vectorized + BROADCAST [RS_802] + Group By Operator [GBY_801] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_700] - Group By Operator [GBY_692] (rows=1 width=12) + <-Map 50 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_689] + Group By Operator [GBY_681] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_677] (rows=852 width=1910) + Select Operator [SEL_673] (rows=1515 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_675] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_623] - Group By Operator [GBY_622] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_657] + <-Reducer 67 [BROADCAST_EDGE] vectorized + BROADCAST [RS_804] + Group By Operator [GBY_803] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_614] - Group By Operator [GBY_606] (rows=1 width=12) + <-Map 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_725] + Group By Operator [GBY_717] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_591] (rows=2000 width=107) + Select Operator [SEL_709] (rows=155 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_589] + Please refer to the previous Select Operator [SEL_693] diff --git a/ql/src/test/results/clientpositive/perf/tez/query89.q.out b/ql/src/test/results/clientpositive/perf/tez/query89.q.out index 61ffe8aa16..0844ca79a8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query89.q.out @@ -65,9 +65,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -81,81 +80,81 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_122] - Limit [LIM_121] (rows=100 width=88) + File Output Operator [FS_117] + Limit [LIM_116] (rows=100 width=801) Number of rows:100 - Select Operator [SEL_120] (rows=191662559 width=88) + Select Operator [SEL_115] (rows=4804228 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] - Select Operator [SEL_118] (rows=191662559 width=88) + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=4804228 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_117] (rows=191662559 width=88) + Filter Operator [FIL_112] (rows=4804228 width=689) predicate:CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_116] (rows=383325119 width=88) + Select Operator [SEL_111] (rows=9608456 width=577) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_115] (rows=383325119 width=88) + PTF Operator [PTF_110] (rows=9608456 width=577) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] - Select Operator [SEL_114] (rows=383325119 width=88) + Select Operator [SEL_109] (rows=9608456 width=577) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] + SHUFFLE [RS_108] PartitionCols:_col2, _col0, _col4, _col5 - Group By Operator [GBY_112] (rows=383325119 width=88) + Group By Operator [GBY_107] (rows=9608456 width=577) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_22] (rows=766650239 width=88) + Group By Operator [GBY_22] (rows=27308180 width=577) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col10, _col12, _col13 - Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) - Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"] + Merge Join Operator [MERGEJOIN_84] (rows=27308180 width=480) + Conds:RS_18._col2=RS_106._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_102] (rows=1704 width=1910) + Select Operator [SEL_105] (rows=1704 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_101] (rows=1704 width=1910) + Filter Operator [FIL_104] (rows=1704 width=183) predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"] + TableScan [TS_9] (rows=1704 width=183) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_83] (rows=27308180 width=301) Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col10"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_94] (rows=36524 width=1119) + Select Operator [SEL_94] (rows=652 width=12) Output:["_col0","_col2"] - Filter Operator [FIL_93] (rows=36524 width=1119) + Filter Operator [FIL_93] (rows=652 width=12) predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) - Conds:RS_111._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_82] (rows=76480702 width=364) + Conds:RS_103._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=462000 width=1436) + Select Operator [SEL_86] (rows=6988 width=286) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_85] (rows=462000 width=1436) + Filter Operator [FIL_85] (rows=6988 width=286) predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and (i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants') and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] + TableScan [TS_3] (rows=462000 width=286) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + SHUFFLE [RS_103] PartitionCols:_col1 - Select Operator [SEL_110] (rows=575995635 width=88) + Select Operator [SEL_102] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_109] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + Filter Operator [FIL_101] (rows=525329897 width=118) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized BROADCAST [RS_100] Group By Operator [GBY_99] (rows=1 width=12) @@ -164,20 +163,9 @@ Stage-0 SHUFFLE [RS_98] Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=36524 width=1119) + Select Operator [SEL_96] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_94] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_92] Group By Operator [GBY_91] (rows=1 width=12) @@ -186,7 +174,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=462000 width=1436) + Select Operator [SEL_88] (rows=6988 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query9.q.out b/ql/src/test/results/clientpositive/perf/tez/query9.q.out index e439d4083c..eb1d3a58b4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query9.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query9.q.out @@ -157,37 +157,37 @@ Stage-0 Stage-1 Reducer 16 File Output Operator [FS_154] - Select Operator [SEL_153] (rows=36 width=1455) + Select Operator [SEL_153] (rows=2 width=560) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_185] (rows=36 width=1455) + Merge Join Operator [MERGEJOIN_185] (rows=2 width=1160) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_150] - Merge Join Operator [MERGEJOIN_184] (rows=36 width=1334) + Merge Join Operator [MERGEJOIN_184] (rows=2 width=1048) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_147] - Merge Join Operator [MERGEJOIN_183] (rows=36 width=1213) + Merge Join Operator [MERGEJOIN_183] (rows=2 width=936) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_144] - Merge Join Operator [MERGEJOIN_182] (rows=36 width=1204) + Merge Join Operator [MERGEJOIN_182] (rows=2 width=928) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_141] - Merge Join Operator [MERGEJOIN_181] (rows=36 width=1083) + Merge Join Operator [MERGEJOIN_181] (rows=2 width=816) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_138] - Merge Join Operator [MERGEJOIN_180] (rows=36 width=962) + Merge Join Operator [MERGEJOIN_180] (rows=2 width=704) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Reducer 10 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_135] - Merge Join Operator [MERGEJOIN_179] (rows=36 width=953) + Merge Join Operator [MERGEJOIN_179] (rows=2 width=696) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_272] - Select Operator [SEL_271] (rows=1 width=120) + Select Operator [SEL_271] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_270] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -195,19 +195,19 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_253] Group By Operator [GBY_248] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_243] (rows=575995635 width=88) + Select Operator [SEL_243] (rows=182855757 width=110) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_238] (rows=575995635 width=88) + Filter Operator [FIL_238] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 41 AND 60 - TableScan [TS_80] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_net_paid_inc_tax"] + TableScan [TS_80] (rows=575995635 width=110) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity","ss_net_paid_inc_tax"] <-Reducer 9 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_132] - Merge Join Operator [MERGEJOIN_178] (rows=36 width=832) + Merge Join Operator [MERGEJOIN_178] (rows=2 width=584) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_269] - Select Operator [SEL_268] (rows=1 width=120) + Select Operator [SEL_268] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_267] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -215,15 +215,15 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_230] Group By Operator [GBY_225] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_220] (rows=575995635 width=88) + Select Operator [SEL_220] (rows=182855757 width=110) Output:["ss_ext_list_price"] - Filter Operator [FIL_215] (rows=575995635 width=88) + Filter Operator [FIL_215] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 41 AND 60 - TableScan [TS_73] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_ext_list_price"] + TableScan [TS_73] (rows=575995635 width=110) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity","ss_ext_list_price"] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_129] - Merge Join Operator [MERGEJOIN_177] (rows=36 width=711) + Merge Join Operator [MERGEJOIN_177] (rows=2 width=472) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_266] @@ -233,18 +233,18 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_208] Group By Operator [GBY_203] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_198] (rows=575995635 width=88) - Filter Operator [FIL_193] (rows=575995635 width=88) + Select Operator [SEL_198] (rows=182855757 width=3) + Filter Operator [FIL_193] (rows=182855757 width=3) predicate:ss_quantity BETWEEN 41 AND 60 - TableScan [TS_66] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity"] + TableScan [TS_66] (rows=575995635 width=3) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_quantity"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_126] - Merge Join Operator [MERGEJOIN_176] (rows=36 width=702) + Merge Join Operator [MERGEJOIN_176] (rows=2 width=464) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_264] - Select Operator [SEL_263] (rows=1 width=120) + Select Operator [SEL_263] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_262] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -252,18 +252,18 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_252] Group By Operator [GBY_247] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_242] (rows=575995635 width=88) + Select Operator [SEL_242] (rows=182855757 width=110) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_237] (rows=575995635 width=88) + Filter Operator [FIL_237] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 21 AND 40 Please refer to the previous TableScan [TS_80] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_123] - Merge Join Operator [MERGEJOIN_175] (rows=36 width=581) + Merge Join Operator [MERGEJOIN_175] (rows=2 width=352) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5"] <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_261] - Select Operator [SEL_260] (rows=1 width=120) + Select Operator [SEL_260] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_259] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -271,14 +271,14 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_229] Group By Operator [GBY_224] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_219] (rows=575995635 width=88) + Select Operator [SEL_219] (rows=182855757 width=110) Output:["ss_ext_list_price"] - Filter Operator [FIL_214] (rows=575995635 width=88) + Filter Operator [FIL_214] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 21 AND 40 Please refer to the previous TableScan [TS_73] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_120] - Merge Join Operator [MERGEJOIN_174] (rows=36 width=460) + Merge Join Operator [MERGEJOIN_174] (rows=2 width=240) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4"] <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_258] @@ -288,29 +288,29 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_207] Group By Operator [GBY_202] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_197] (rows=575995635 width=88) - Filter Operator [FIL_192] (rows=575995635 width=88) + Select Operator [SEL_197] (rows=182855757 width=3) + Filter Operator [FIL_192] (rows=182855757 width=3) predicate:ss_quantity BETWEEN 21 AND 40 Please refer to the previous TableScan [TS_66] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_117] - Merge Join Operator [MERGEJOIN_173] (rows=36 width=451) + Merge Join Operator [MERGEJOIN_173] (rows=2 width=232) Conds:(Left Outer),Output:["_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_114] - Merge Join Operator [MERGEJOIN_172] (rows=36 width=330) + Merge Join Operator [MERGEJOIN_172] (rows=2 width=120) Conds:(Left Outer),Output:["_col1","_col2"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_111] - Merge Join Operator [MERGEJOIN_171] (rows=36 width=209) + Merge Join Operator [MERGEJOIN_171] (rows=2 width=8) Conds:(Left Outer),Output:["_col1"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_188] - Select Operator [SEL_187] (rows=36 width=200) - Filter Operator [FIL_186] (rows=36 width=200) + Select Operator [SEL_187] (rows=2 width=4) + Filter Operator [FIL_186] (rows=2 width=4) predicate:(r_reason_sk = 1) - TableScan [TS_0] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk"] + TableScan [TS_0] (rows=72 width=4) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk"] <-Reducer 20 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_210] Group By Operator [GBY_209] (rows=1 width=8) @@ -319,13 +319,13 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_206] Group By Operator [GBY_201] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_196] (rows=575995635 width=88) - Filter Operator [FIL_191] (rows=575995635 width=88) + Select Operator [SEL_196] (rows=182855757 width=3) + Filter Operator [FIL_191] (rows=182855757 width=3) predicate:ss_quantity BETWEEN 1 AND 20 Please refer to the previous TableScan [TS_66] <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_233] - Select Operator [SEL_232] (rows=1 width=120) + Select Operator [SEL_232] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_231] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -333,14 +333,14 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_228] Group By Operator [GBY_223] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_218] (rows=575995635 width=88) + Select Operator [SEL_218] (rows=182855757 width=110) Output:["ss_ext_list_price"] - Filter Operator [FIL_213] (rows=575995635 width=88) + Filter Operator [FIL_213] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 1 AND 20 Please refer to the previous TableScan [TS_73] <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_256] - Select Operator [SEL_255] (rows=1 width=120) + Select Operator [SEL_255] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_254] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -348,9 +348,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_251] Group By Operator [GBY_246] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_241] (rows=575995635 width=88) + Select Operator [SEL_241] (rows=182855757 width=110) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_236] (rows=575995635 width=88) + Filter Operator [FIL_236] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 1 AND 20 Please refer to the previous TableScan [TS_80] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized @@ -361,13 +361,13 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_204] Group By Operator [GBY_199] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_194] (rows=575995635 width=88) - Filter Operator [FIL_189] (rows=575995635 width=88) + Select Operator [SEL_194] (rows=182855757 width=3) + Filter Operator [FIL_189] (rows=182855757 width=3) predicate:ss_quantity BETWEEN 61 AND 80 Please refer to the previous TableScan [TS_66] <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_277] - Select Operator [SEL_276] (rows=1 width=120) + Select Operator [SEL_276] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_275] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -375,14 +375,14 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_226] Group By Operator [GBY_221] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_216] (rows=575995635 width=88) + Select Operator [SEL_216] (rows=182855757 width=110) Output:["ss_ext_list_price"] - Filter Operator [FIL_211] (rows=575995635 width=88) + Filter Operator [FIL_211] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 61 AND 80 Please refer to the previous TableScan [TS_73] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_280] - Select Operator [SEL_279] (rows=1 width=120) + Select Operator [SEL_279] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_278] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -390,9 +390,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_249] Group By Operator [GBY_244] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_239] (rows=575995635 width=88) + Select Operator [SEL_239] (rows=182855757 width=110) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_234] (rows=575995635 width=88) + Filter Operator [FIL_234] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 61 AND 80 Please refer to the previous TableScan [TS_80] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized @@ -403,13 +403,13 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_205] Group By Operator [GBY_200] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_195] (rows=575995635 width=88) - Filter Operator [FIL_190] (rows=575995635 width=88) + Select Operator [SEL_195] (rows=182855757 width=3) + Filter Operator [FIL_190] (rows=182855757 width=3) predicate:ss_quantity BETWEEN 81 AND 100 Please refer to the previous TableScan [TS_66] <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_285] - Select Operator [SEL_284] (rows=1 width=120) + Select Operator [SEL_284] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_283] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -417,14 +417,14 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_227] Group By Operator [GBY_222] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_217] (rows=575995635 width=88) + Select Operator [SEL_217] (rows=182855757 width=110) Output:["ss_ext_list_price"] - Filter Operator [FIL_212] (rows=575995635 width=88) + Filter Operator [FIL_212] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 81 AND 100 Please refer to the previous TableScan [TS_73] <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_288] - Select Operator [SEL_287] (rows=1 width=120) + Select Operator [SEL_287] (rows=1 width=112) Output:["_col0"] Group By Operator [GBY_286] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] @@ -432,9 +432,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_250] Group By Operator [GBY_245] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_240] (rows=575995635 width=88) + Select Operator [SEL_240] (rows=182855757 width=110) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_235] (rows=575995635 width=88) + Filter Operator [FIL_235] (rows=182855757 width=110) predicate:ss_quantity BETWEEN 81 AND 100 Please refer to the previous TableScan [TS_80] diff --git a/ql/src/test/results/clientpositive/perf/tez/query90.q.out b/ql/src/test/results/clientpositive/perf/tez/query90.q.out index 65ec9e07b8..653ce1a912 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query90.q.out @@ -79,15 +79,15 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_209] - Limit [LIM_208] (rows=1 width=17) + Limit [LIM_208] (rows=1 width=112) Number of rows:100 - Select Operator [SEL_207] (rows=1 width=17) + Select Operator [SEL_207] (rows=1 width=112) Output:["_col0"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_56] - Select Operator [SEL_55] (rows=1 width=17) + Select Operator [SEL_55] (rows=1 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_152] (rows=1 width=17) + Merge Join Operator [MERGEJOIN_152] (rows=1 width=16) Conds:(Inner),Output:["_col0","_col1"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_206] @@ -97,54 +97,54 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_49] Group By Operator [GBY_48] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_151] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_151] (rows=153010 width=8) Conds:RS_44._col1=RS_183._col0(Inner) <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_183] PartitionCols:_col0 - Select Operator [SEL_180] (rows=3600 width=107) + Select Operator [SEL_180] (rows=655 width=8) Output:["_col0"] - Filter Operator [FIL_179] (rows=3600 width=107) + Filter Operator [FIL_179] (rows=655 width=8) predicate:((hd_dep_count = 8) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + TableScan [TS_9] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_150] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_150] (rows=1681936 width=3) Conds:RS_41._col0=RS_171._col0(Inner),Output:["_col1"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_171] PartitionCols:_col0 - Select Operator [SEL_168] (rows=86400 width=471) + Select Operator [SEL_168] (rows=9095 width=8) Output:["_col0"] - Filter Operator [FIL_166] (rows=86400 width=471) + Filter Operator [FIL_166] (rows=9095 width=8) predicate:(t_hour BETWEEN 14 AND 15 and t_time_sk is not null) - TableScan [TS_6] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour"] + TableScan [TS_6] (rows=86400 width=8) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_149] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_149] (rows=15977923 width=7) Conds:RS_204._col2=RS_157._col0(Inner),Output:["_col0","_col1"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_154] (rows=511 width=585) + Select Operator [SEL_154] (rows=511 width=7) Output:["_col0"] - Filter Operator [FIL_153] (rows=511 width=585) + Filter Operator [FIL_153] (rows=511 width=7) predicate:(wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) - TableScan [TS_3] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk","wp_char_count"] + TableScan [TS_3] (rows=4602 width=7) + default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk","wp_char_count"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_204] PartitionCols:_col2 - Select Operator [SEL_203] (rows=144002668 width=135) + Select Operator [SEL_203] (rows=143895111 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_202] (rows=144002668 width=135) + Filter Operator [FIL_202] (rows=143895111 width=11) predicate:((ws_ship_hdemo_sk BETWEEN DynamicValue(RS_45_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_45_household_demographics_hd_demo_sk_max) and in_bloom_filter(ws_ship_hdemo_sk, DynamicValue(RS_45_household_demographics_hd_demo_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_39_web_page_wp_web_page_sk_min) AND DynamicValue(RS_39_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_39_web_page_wp_web_page_sk_bloom_filter))) and ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null and ws_web_page_sk is not null) - TableScan [TS_26] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_time_sk","ws_ship_hdemo_sk","ws_web_page_sk"] + TableScan [TS_26] (rows=144002668 width=11) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_time_sk","ws_ship_hdemo_sk","ws_web_page_sk"] <-Reducer 14 [BROADCAST_EDGE] vectorized BROADCAST [RS_197] Group By Operator [GBY_196] (rows=1 width=12) @@ -153,7 +153,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_162] Group By Operator [GBY_160] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_158] (rows=511 width=585) + Select Operator [SEL_158] (rows=511 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_154] <-Reducer 17 [BROADCAST_EDGE] vectorized @@ -164,7 +164,7 @@ Stage-0 SHUFFLE [RS_176] Group By Operator [GBY_174] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_172] (rows=86400 width=471) + Select Operator [SEL_172] (rows=9095 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_168] <-Reducer 20 [BROADCAST_EDGE] vectorized @@ -175,7 +175,7 @@ Stage-0 SHUFFLE [RS_188] Group By Operator [GBY_186] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_184] (rows=3600 width=107) + Select Operator [SEL_184] (rows=655 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_180] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized @@ -186,7 +186,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_23] Group By Operator [GBY_22] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_148] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_148] (rows=153010 width=8) Conds:RS_18._col1=RS_181._col0(Inner) <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_181] @@ -195,20 +195,20 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_147] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_147] (rows=1681936 width=3) Conds:RS_15._col0=RS_169._col0(Inner),Output:["_col1"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_169] PartitionCols:_col0 - Select Operator [SEL_167] (rows=86400 width=471) + Select Operator [SEL_167] (rows=9095 width=8) Output:["_col0"] - Filter Operator [FIL_165] (rows=86400 width=471) + Filter Operator [FIL_165] (rows=9095 width=8) predicate:(t_hour BETWEEN 6 AND 7 and t_time_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_146] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_146] (rows=15977923 width=7) Conds:RS_193._col2=RS_155._col0(Inner),Output:["_col0","_col1"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_155] @@ -217,12 +217,12 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_193] PartitionCols:_col2 - Select Operator [SEL_192] (rows=144002668 width=135) + Select Operator [SEL_192] (rows=143895111 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_191] (rows=144002668 width=135) + Filter Operator [FIL_191] (rows=143895111 width=11) predicate:((ws_ship_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ws_ship_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ws_sold_time_sk BETWEEN DynamicValue(RS_16_time_dim_t_time_sk_min) AND DynamicValue(RS_16_time_dim_t_time_sk_max) and in_bloom_filter(ws_sold_time_sk, DynamicValue(RS_16_time_dim_t_time_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_13_web_page_wp_web_page_sk_min) AND DynamicValue(RS_13_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_13_web_page_wp_web_page_sk_bloom_filter))) and ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null and ws_web_page_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_time_sk","ws_ship_hdemo_sk","ws_web_page_sk"] + TableScan [TS_0] (rows=144002668 width=11) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_time_sk","ws_ship_hdemo_sk","ws_web_page_sk"] <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_178] Group By Operator [GBY_177] (rows=1 width=12) @@ -231,7 +231,7 @@ Stage-0 SHUFFLE [RS_175] Group By Operator [GBY_173] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_170] (rows=86400 width=471) + Select Operator [SEL_170] (rows=9095 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_167] <-Reducer 19 [BROADCAST_EDGE] vectorized @@ -242,7 +242,7 @@ Stage-0 SHUFFLE [RS_187] Group By Operator [GBY_185] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_182] (rows=3600 width=107) + Select Operator [SEL_182] (rows=655 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_180] <-Reducer 9 [BROADCAST_EDGE] vectorized @@ -253,7 +253,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_161] Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_156] (rows=511 width=585) + Select Operator [SEL_156] (rows=511 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_154] diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out index a1e4267b1c..5b4952dbd4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out @@ -92,107 +92,107 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_170] - Select Operator [SEL_169] (rows=58564004 width=860) + Select Operator [SEL_169] (rows=1 width=406) Output:["_col0","_col1","_col2","_col3"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_168] - Select Operator [SEL_167] (rows=58564004 width=860) + Select Operator [SEL_167] (rows=1 width=518) Output:["_col0","_col1","_col2","_col4"] - Group By Operator [GBY_166] (rows=58564004 width=860) + Group By Operator [GBY_166] (rows=1 width=585) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_41] (rows=117128008 width=860) + Group By Operator [GBY_41] (rows=1 width=585) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5, _col6, _col17, _col18, _col19 - Merge Join Operator [MERGEJOIN_144] (rows=117128008 width=860) + Merge Join Operator [MERGEJOIN_144] (rows=10438 width=473) Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_164] (rows=3600 width=107) + Select Operator [SEL_164] (rows=3600 width=96) Output:["_col0"] - Filter Operator [FIL_163] (rows=3600 width=107) + Filter Operator [FIL_163] (rows=3600 width=96) predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) - TableScan [TS_25] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] + TableScan [TS_25] (rows=7200 width=96) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_143] (rows=106480005 width=860) + Merge Join Operator [MERGEJOIN_143] (rows=20876 width=473) Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_142] (rows=34846646 width=106) + Merge Join Operator [MERGEJOIN_142] (rows=657590 width=312) Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_161] (rows=60 width=2045) + Select Operator [SEL_161] (rows=60 width=298) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_160] (rows=60 width=2045) + Filter Operator [FIL_160] (rows=60 width=298) predicate:cc_call_center_sk is not null - TableScan [TS_15] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] + TableScan [TS_15] (rows=60 width=298) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_141] (rows=31678769 width=106) + Merge Join Operator [MERGEJOIN_141] (rows=657590 width=19) Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_155] (rows=28798881 width=106) + Select Operator [SEL_155] (rows=27658583 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_154] (rows=28798881 width=106) + Filter Operator [FIL_154] (rows=27658583 width=121) predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) - TableScan [TS_9] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] + TableScan [TS_9] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_158] (rows=18262 width=1119) + Select Operator [SEL_158] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_157] (rows=18262 width=1119) + Filter Operator [FIL_157] (rows=50 width=12) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_140] (rows=96800003 width=860) + Merge Join Operator [MERGEJOIN_140] (rows=479709 width=183) Conds:RS_31._col3=RS_153._col0(Inner),Output:["_col0","_col2","_col5","_col6"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_152] (rows=20000000 width=1014) + Select Operator [SEL_152] (rows=8000000 width=116) Output:["_col0"] - Filter Operator [FIL_151] (rows=20000000 width=1014) + Filter Operator [FIL_151] (rows=8000000 width=112) predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] + TableScan [TS_6] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_139] (rows=88000001 width=860) + Merge Join Operator [MERGEJOIN_139] (rows=2398543 width=187) Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_147] PartitionCols:_col1 - Select Operator [SEL_146] (rows=80000000 width=860) + Select Operator [SEL_146] (rows=74500295 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_145] (rows=80000000 width=860) + Filter Operator [FIL_145] (rows=74500295 width=15) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] + TableScan [TS_0] (rows=80000000 width=15) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_149] (rows=930900 width=385) + Select Operator [SEL_149] (rows=59105 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_148] (rows=930900 width=385) + Filter Operator [FIL_148] (rows=59105 width=183) predicate:((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + TableScan [TS_3] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/query92.q.out index b9a1d95045..50918f0966 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out @@ -67,112 +67,155 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +Map 1 <- Reducer 15 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 13 <- Reducer 12 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) +Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 2 (SIMPLE_EDGE) -Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 5 vectorized - File Output Operator [FS_135] - Limit [LIM_134] (rows=1 width=112) + File Output Operator [FS_147] + Limit [LIM_146] (rows=1 width=224) Number of rows:100 - Select Operator [SEL_133] (rows=1 width=112) + Select Operator [SEL_145] (rows=1 width=224) Output:["_col0"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - Select Operator [SEL_131] (rows=1 width=112) + SHUFFLE [RS_144] + Select Operator [SEL_143] (rows=1 width=224) Output:["_col1"] - Group By Operator [GBY_130] (rows=1 width=112) + Group By Operator [GBY_142] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_36] Group By Operator [GBY_35] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col2)"] - Select Operator [SEL_34] (rows=58081078 width=135) + Select Operator [SEL_34] (rows=2478 width=112) Output:["_col2"] - Filter Operator [FIL_33] (rows=58081078 width=135) + Filter Operator [FIL_33] (rows=2478 width=112) predicate:(_col2 > CAST( (1.3 * _col6) AS decimal(14,7))) - Merge Join Operator [MERGEJOIN_107] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_107] (rows=7434 width=112) Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col6"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_30] + PARTITION_ONLY_SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_104] (rows=158402938 width=135) - Conds:RS_126._col0=RS_110._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_110] + Merge Join Operator [MERGEJOIN_104] (rows=15995224 width=115) + Conds:RS_131._col0=RS_110._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_109] (rows=8116 width=1119) + Select Operator [SEL_109] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_108] (rows=8116 width=1119) + Filter Operator [FIL_108] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + SHUFFLE [RS_131] PartitionCols:_col0 - Select Operator [SEL_125] (rows=144002668 width=135) + Select Operator [SEL_130] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_124] (rows=144002668 width=135) + Filter Operator [FIL_129] (rows=143966864 width=119) predicate:((ws_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) + TableScan [TS_0] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_119] (rows=231000 width=1436) + Select Operator [SEL_123] (rows=669 width=4) Output:["_col0"] - Select Operator [SEL_117] (rows=231000 width=1436) + Select Operator [SEL_121] (rows=669 width=8) Output:["_col0"] - Filter Operator [FIL_116] (rows=231000 width=1436) + Filter Operator [FIL_120] (rows=669 width=7) predicate:((i_manufact_id = 269) and i_item_sk is not null) - TableScan [TS_20] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_115] - Group By Operator [GBY_114] (rows=1 width=12) + TableScan [TS_20] (rows=462000 width=7) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_119] + Group By Operator [GBY_118] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_113] - Group By Operator [GBY_112] (rows=1 width=12) + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_111] (rows=8116 width=1119) + Select Operator [SEL_111] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_109] - <-Reducer 7 [ONE_TO_ONE_EDGE] + <-Reducer 11 [ONE_TO_ONE_EDGE] FORWARD [RS_31] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_106] (rows=87121617 width=135) - Conds:RS_129._col0=RS_118._col0(Inner),Output:["_col1","_col2"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + Merge Join Operator [MERGEJOIN_106] (rows=97 width=116) + Conds:RS_141._col0=RS_122._col0(Inner),Output:["_col1","_col2"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_122] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_117] - <-Reducer 6 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_129] + Please refer to the previous Select Operator [SEL_121] + <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_141] PartitionCols:_col0 - Select Operator [SEL_128] (rows=79201469 width=135) + Select Operator [SEL_140] (rows=6951 width=116) Output:["_col0","_col1"] - Group By Operator [GBY_127] (rows=79201469 width=135) + Group By Operator [GBY_139] (rows=6951 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=158402938 width=135) + Group By Operator [GBY_16] (rows=55608 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Please refer to the previous Merge Join Operator [MERGEJOIN_104] + Merge Join Operator [MERGEJOIN_105] (rows=15995224 width=115) + Conds:RS_138._col0=RS_112._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_109] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=143966864 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_136] (rows=143966864 width=119) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ws_item_sk BETWEEN DynamicValue(RS_30_web_sales_ws_item_sk_min) AND DynamicValue(RS_30_web_sales_ws_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_30_web_sales_ws_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_6] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_128] + Please refer to the previous Group By Operator [GBY_126] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_113] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_92] (rows=15995224 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_104] diff --git a/ql/src/test/results/clientpositive/perf/tez/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/query93.q.out index efd196f0e9..2891fccb3f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query93.q.out @@ -57,75 +57,75 @@ Stage-0 Stage-1 Reducer 5 vectorized File Output Operator [FS_82] - Limit [LIM_81] (rows=100 width=88) + Limit [LIM_81] (rows=100 width=112) Number of rows:100 - Select Operator [SEL_80] (rows=316797606 width=88) + Select Operator [SEL_80] (rows=38308 width=112) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_79] - Group By Operator [GBY_78] (rows=316797606 width=88) + Group By Operator [GBY_78] (rows=38308 width=112) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=633595212 width=88) + Group By Operator [GBY_17] (rows=38308 width=112) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_15] (rows=633595212 width=88) + Select Operator [SEL_15] (rows=15586502 width=3) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_64] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_64] (rows=15586502 width=3) Conds:RS_12._col0, _col2=RS_77._col0, _col2(Inner),Output:["_col3","_col7","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_12] PartitionCols:_col0, _col2 - Merge Join Operator [MERGEJOIN_63] (rows=63350266 width=77) + Merge Join Operator [MERGEJOIN_63] (rows=1522298 width=8) Conds:RS_67._col1=RS_70._col0(Inner),Output:["_col0","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_67] PartitionCols:_col1 - Select Operator [SEL_66] (rows=57591150 width=77) + Select Operator [SEL_66] (rows=55574563 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_65] (rows=57591150 width=77) + Filter Operator [FIL_65] (rows=55574563 width=15) predicate:(sr_item_sk is not null and sr_reason_sk is not null and sr_ticket_number is not null) - TableScan [TS_0] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] + TableScan [TS_0] (rows=57591150 width=15) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_70] PartitionCols:_col0 - Select Operator [SEL_69] (rows=36 width=200) + Select Operator [SEL_69] (rows=1 width=113) Output:["_col0"] - Filter Operator [FIL_68] (rows=36 width=200) + Filter Operator [FIL_68] (rows=1 width=101) predicate:((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) - TableScan [TS_3] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + TableScan [TS_3] (rows=72 width=101) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_77] PartitionCols:_col0, _col2 - Select Operator [SEL_76] (rows=575995635 width=88) + Select Operator [SEL_76] (rows=575995635 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_75] (rows=575995635 width=88) + Filter Operator [FIL_75] (rows=575995635 width=122) predicate:((ss_item_sk BETWEEN DynamicValue(RS_12_store_returns_sr_item_sk_min) AND DynamicValue(RS_12_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_12_store_returns_sr_item_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_12_store_returns_sr_ticket_number_min) AND DynamicValue(RS_12_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_12_store_returns_sr_ticket_number_bloom_filter))) and ss_item_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] + TableScan [TS_6] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] <-Reducer 6 [BROADCAST_EDGE] vectorized BROADCAST [RS_72] Group By Operator [GBY_71] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_55] Group By Operator [GBY_54] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_53] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_53] (rows=1522298 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_63] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_74] Group By Operator [GBY_73] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_60] Group By Operator [GBY_59] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_58] (rows=63350266 width=77) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_58] (rows=1522298 width=8) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_63] diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out index 5921b32339..ada9f3b86b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out @@ -69,16 +69,18 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -88,150 +90,161 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=1 width=344) + File Output Operator [FS_178] + Limit [LIM_177] (rows=1 width=240) Number of rows:100 - Select Operator [SEL_174] (rows=1 width=344) + Select Operator [SEL_176] (rows=1 width=240) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] - Select Operator [SEL_172] (rows=1 width=344) + SHUFFLE [RS_175] + Select Operator [SEL_174] (rows=1 width=240) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_171] (rows=1 width=344) + Group By Operator [GBY_173] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_170] - Group By Operator [GBY_169] (rows=1 width=344) + PARTITION_ONLY_SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_168] (rows=115958879 width=135) + Group By Operator [GBY_170] (rows=2511437 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_73] (rows=115958879 width=135) + Group By Operator [GBY_73] (rows=2511437 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_42] (rows=115958879 width=135) + Select Operator [SEL_42] (rows=5022875 width=229) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_41] (rows=115958879 width=135) + Filter Operator [FIL_41] (rows=5022875 width=229) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_130] (rows=231917759 width=135) - Conds:RS_38._col4=RS_167._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_167] + Merge Join Operator [MERGEJOIN_130] (rows=10045750 width=229) + Conds:RS_38._col4=RS_169._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_169] PartitionCols:_col0 - Select Operator [SEL_166] (rows=7199233 width=92) + Select Operator [SEL_168] (rows=8007986 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_165] (rows=7199233 width=92) + Group By Operator [GBY_167] (rows=8007986 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=14398467 width=92) + Group By Operator [GBY_165] (rows=14398467 width=4) Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_162] (rows=14398467 width=92) + Filter Operator [FIL_164] (rows=14398467 width=4) predicate:wr_order_number is not null - TableScan [TS_25] (rows=14398467 width=92) - default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] + TableScan [TS_25] (rows=14398467 width=4) + default@web_returns,wr1,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] <-Reducer 5 [ONE_TO_ONE_EDGE] FORWARD [RS_38] PartitionCols:_col4 - Select Operator [SEL_37] (rows=210834322 width=135) + Select Operator [SEL_37] (rows=5022875 width=231) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_129] (rows=210834322 width=135) - Conds:RS_34._col4=RS_161._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] - PartitionCols:_col0 - Group By Operator [GBY_160] (rows=144002668 width=135) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_159] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=144002668 width=135) - predicate:(ws_order_number is not null and ws_warehouse_sk is not null) - TableScan [TS_22] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + Merge Join Operator [MERGEJOIN_129] (rows=5022875 width=235) + Conds:RS_34._col4=RS_163._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_128] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_128] (rows=5022875 width=231) Conds:RS_18._col2=RS_149._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 14 [SIMPLE_EDGE] vectorized + <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_148] (rows=42 width=1850) + Select Operator [SEL_148] (rows=12 width=91) Output:["_col0"] - Filter Operator [FIL_147] (rows=42 width=1850) + Filter Operator [FIL_147] (rows=12 width=92) predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] + TableScan [TS_9] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_127] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_127] (rows=15673790 width=235) Conds:RS_15._col1=RS_141._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized + <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_140] (rows=20000000 width=1014) + Select Operator [SEL_140] (rows=784314 width=90) Output:["_col0"] - Filter Operator [FIL_139] (rows=20000000 width=1014) + Filter Operator [FIL_139] (rows=784314 width=90) predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_126] (rows=158402938 width=135) + Merge Join Operator [MERGEJOIN_126] (rows=15987241 width=239) Conds:RS_157._col0=RS_133._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 10 [SIMPLE_EDGE] vectorized + <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_133] PartitionCols:_col0 - Select Operator [SEL_132] (rows=8116 width=1119) + Select Operator [SEL_132] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_131] (rows=8116 width=1119) + Filter Operator [FIL_131] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_156] (rows=144002668 width=135) + Select Operator [SEL_156] (rows=143895019 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_155] (rows=144002668 width=135) + Filter Operator [FIL_155] (rows=143895019 width=243) predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_19_web_site_web_site_sk_min) AND DynamicValue(RS_19_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_19_web_site_web_site_sk_bloom_filter))) and ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized BROADCAST [RS_138] Group By Operator [GBY_137] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_136] Group By Operator [GBY_135] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_134] (rows=8116 width=1119) + Select Operator [SEL_134] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_132] - <-Reducer 13 [BROADCAST_EDGE] vectorized + <-Reducer 14 [BROADCAST_EDGE] vectorized BROADCAST [RS_146] Group By Operator [GBY_145] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_144] Group By Operator [GBY_143] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_142] (rows=20000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_142] (rows=784314 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_140] - <-Reducer 15 [BROADCAST_EDGE] vectorized + <-Reducer 16 [BROADCAST_EDGE] vectorized BROADCAST [RS_154] Group By Operator [GBY_153] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_152] Group By Operator [GBY_151] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_150] (rows=42 width=1850) + Select Operator [SEL_150] (rows=12 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_148] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Group By Operator [GBY_162] (rows=143966743 width=7) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_161] (rows=143966743 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=143966743 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_34_ws1_ws_order_number_min) AND DynamicValue(RS_34_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_34_ws1_ws_order_number_bloom_filter))) and ws_order_number is not null and ws_warehouse_sk is not null) + TableScan [TS_22] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_159] + Group By Operator [GBY_158] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_114] (rows=5022875 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_128] diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out index c093eb330d..a88c534409 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out @@ -75,226 +75,256 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) -Map 15 <- Reducer 23 (BROADCAST_EDGE) -Map 21 <- Reducer 23 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 17 <- Map 22 (SIMPLE_EDGE), Reducer 16 (ONE_TO_ONE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 18 <- Reducer 11 (BROADCAST_EDGE) +Map 21 <- Reducer 11 (BROADCAST_EDGE) +Map 22 <- Reducer 10 (BROADCAST_EDGE) +Map 26 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 20 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 24 <- Map 27 (SIMPLE_EDGE), Reducer 23 (ONE_TO_ONE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 20 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 25 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_286] - Limit [LIM_285] (rows=1 width=344) + Reducer 9 vectorized + File Output Operator [FS_296] + Limit [LIM_295] (rows=1 width=240) Number of rows:100 - Select Operator [SEL_284] (rows=1 width=344) + Select Operator [SEL_294] (rows=1 width=240) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_283] - Select Operator [SEL_282] (rows=1 width=344) + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + Select Operator [SEL_292] (rows=1 width=240) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_281] (rows=1 width=344) + Group By Operator [GBY_291] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_280] - Group By Operator [GBY_279] (rows=1 width=344) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_290] + Group By Operator [GBY_289] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_278] (rows=421668645 width=135) + Group By Operator [GBY_288] (rows=2511437 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_116] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_115] (rows=421668645 width=135) + Group By Operator [GBY_114] (rows=2511437 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_228] (rows=421668645 width=135) - Conds:RS_58._col3=RS_277._col0(Inner),RS_58._col3=RS_275._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_275] + Merge Join Operator [MERGEJOIN_235] (rows=5022875 width=227) + Conds:RS_61._col3=RS_287._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_61] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_234] (rows=5022875 width=227) + Conds:RS_58._col3=RS_273._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_230] (rows=5022875 width=227) + Conds:RS_55._col2=RS_254._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] + PartitionCols:_col0 + Select Operator [SEL_253] (rows=12 width=91) + Output:["_col0"] + Filter Operator [FIL_252] (rows=12 width=92) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_9] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_229] (rows=15673790 width=231) + Conds:RS_52._col1=RS_246._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0 + Select Operator [SEL_245] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_244] (rows=784314 width=90) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_228] (rows=15987241 width=235) + Conds:RS_262._col0=RS_238._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col0 + Select Operator [SEL_237] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_236] (rows=8116 width=98) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_262] + PartitionCols:_col0 + Select Operator [SEL_261] (rows=143895019 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_260] (rows=143895019 width=239) + predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_56_web_site_web_site_sk_min) AND DynamicValue(RS_56_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_56_web_site_web_site_sk_bloom_filter))) and ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=239) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_243] + Group By Operator [GBY_242] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_239] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_237] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_251] + Group By Operator [GBY_250] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] + Group By Operator [GBY_248] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_247] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_245] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_259] + Group By Operator [GBY_258] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] + Group By Operator [GBY_256] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_255] (rows=12 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_253] + <-Reducer 20 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_273] + PartitionCols:_col0 + Group By Operator [GBY_272] (rows=14686712 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Group By Operator [GBY_23] (rows=144002668 width=4) + Output:["_col0"],keys:_col1 + Select Operator [SEL_22] (rows=1411940834 width=11) + Output:["_col1"] + Filter Operator [FIL_21] (rows=1411940834 width=11) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_231] (rows=1411940834 width=11) + Conds:RS_268._col1=RS_271._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_268] + PartitionCols:_col1 + Select Operator [SEL_267] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_266] (rows=144002668 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_58_ws1_ws_order_number_min) AND DynamicValue(RS_58_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_58_ws1_ws_order_number_bloom_filter))) and ws_order_number is not null) + TableScan [TS_12] (rows=144002668 width=7) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_264] + Group By Operator [GBY_263] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_181] (rows=5022875 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_230] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_271] + PartitionCols:_col1 + Select Operator [SEL_270] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_269] (rows=144002668 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_58_ws1_ws_order_number_min) AND DynamicValue(RS_58_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_58_ws1_ws_order_number_bloom_filter))) and ws_order_number is not null) + TableScan [TS_15] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_265] + Please refer to the previous Group By Operator [GBY_263] + <-Reducer 25 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_287] PartitionCols:_col0 - Group By Operator [GBY_274] (rows=87121617 width=135) + Group By Operator [GBY_286] (rows=8007986 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0 - Group By Operator [GBY_45] (rows=174243235 width=135) + Group By Operator [GBY_45] (rows=14398467 width=4) Output:["_col0"],keys:_col14 - Merge Join Operator [MERGEJOIN_227] (rows=174243235 width=135) - Conds:RS_41._col0=RS_255._col13(Inner),Output:["_col14"] - <-Map 22 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_255] + Merge Join Operator [MERGEJOIN_233] (rows=1384229738 width=4) + Conds:RS_41._col0=RS_285._col13(Inner),Output:["_col14"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_285] PartitionCols:_col13 - Select Operator [SEL_254] (rows=14398467 width=92) + Select Operator [SEL_284] (rows=14398467 width=272) Output:["_col13"] - Filter Operator [FIL_253] (rows=14398467 width=92) + Filter Operator [FIL_283] (rows=14398467 width=4) predicate:wr_order_number is not null - TableScan [TS_38] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 16 [ONE_TO_ONE_EDGE] + TableScan [TS_38] (rows=14398467 width=4) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] + <-Reducer 23 [ONE_TO_ONE_EDGE] FORWARD [RS_41] PartitionCols:_col0 - Select Operator [SEL_37] (rows=158402938 width=135) + Select Operator [SEL_37] (rows=1411940834 width=4) Output:["_col0"] - Filter Operator [FIL_36] (rows=158402938 width=135) + Filter Operator [FIL_36] (rows=1411940834 width=11) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_226] (rows=158402938 width=135) - Conds:RS_268._col1=RS_272._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_268] + Merge Join Operator [MERGEJOIN_232] (rows=1411940834 width=11) + Conds:RS_279._col1=RS_282._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_279] PartitionCols:_col1 - Select Operator [SEL_267] (rows=144002668 width=135) + Select Operator [SEL_278] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_266] (rows=144002668 width=135) - predicate:((ws_order_number BETWEEN DynamicValue(RS_42_web_returns_wr_order_number_min) AND DynamicValue(RS_42_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_42_web_returns_wr_order_number_bloom_filter))) and ws_order_number is not null) - TableScan [TS_27] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_259] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14398467)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_258] - Group By Operator [GBY_257] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14398467)"] - Select Operator [SEL_256] (rows=14398467 width=92) + Filter Operator [FIL_277] (rows=144002668 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) and ws_order_number is not null) + TableScan [TS_27] (rows=144002668 width=7) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_275] + Group By Operator [GBY_274] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_202] + Group By Operator [GBY_201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_200] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_254] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_272] + Please refer to the previous Merge Join Operator [MERGEJOIN_234] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] PartitionCols:_col1 - Select Operator [SEL_271] (rows=144002668 width=135) + Select Operator [SEL_281] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_270] (rows=144002668 width=135) - predicate:((ws_order_number BETWEEN DynamicValue(RS_42_web_returns_wr_order_number_min) AND DynamicValue(RS_42_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_42_web_returns_wr_order_number_bloom_filter))) and ws_order_number is not null) - TableScan [TS_30] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_262] - Please refer to the previous Group By Operator [GBY_259] - <-Reducer 20 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_277] - PartitionCols:_col0 - Group By Operator [GBY_276] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Group By Operator [GBY_23] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Select Operator [SEL_22] (rows=158402938 width=135) - Output:["_col1"] - Filter Operator [FIL_21] (rows=158402938 width=135) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_225] (rows=158402938 width=135) - Conds:RS_269._col1=RS_273._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_269] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_267] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_273] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_271] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_224] (rows=191667562 width=135) - Conds:RS_55._col2=RS_247._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] - PartitionCols:_col0 - Select Operator [SEL_246] (rows=42 width=1850) - Output:["_col0"] - Filter Operator [FIL_245] (rows=42 width=1850) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_223] (rows=174243235 width=135) - Conds:RS_52._col1=RS_239._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] - PartitionCols:_col0 - Select Operator [SEL_238] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_237] (rows=20000000 width=1014) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_222] (rows=158402938 width=135) - Conds:RS_265._col0=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_229] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] - PartitionCols:_col0 - Select Operator [SEL_264] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_263] (rows=144002668 width=135) - predicate:((ws_order_number BETWEEN DynamicValue(RS_42_web_returns_wr_order_number_min) AND DynamicValue(RS_42_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_42_web_returns_wr_order_number_bloom_filter))) and (ws_ship_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_56_web_site_web_site_sk_min) AND DynamicValue(RS_56_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_56_web_site_web_site_sk_bloom_filter))) and ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_260] - Please refer to the previous Group By Operator [GBY_259] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - Group By Operator [GBY_233] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_232] (rows=8116 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_230] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] - Group By Operator [GBY_241] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_240] (rows=20000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_238] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_252] - Group By Operator [GBY_251] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] - Group By Operator [GBY_249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_248] (rows=42 width=1850) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_246] + Filter Operator [FIL_280] (rows=144002668 width=7) + predicate:((ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) and ws_order_number is not null) + TableScan [TS_30] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_276] + Please refer to the previous Group By Operator [GBY_274] diff --git a/ql/src/test/results/clientpositive/perf/tez/query96.q.out b/ql/src/test/results/clientpositive/perf/tez/query96.q.out index aacf3017c7..d06ad6a611 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query96.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query96.q.out @@ -57,13 +57,13 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_109] - Limit [LIM_108] (rows=1 width=8) + Limit [LIM_108] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_107] (rows=1 width=8) + Select Operator [SEL_107] (rows=1 width=16) Output:["_col0"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=1 width=8) + Select Operator [SEL_105] (rows=1 width=16) Output:["_col1"] Group By Operator [GBY_104] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] @@ -71,54 +71,54 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_23] Group By Operator [GBY_22] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_76] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_76] (rows=1084713 width=8) Conds:RS_18._col2=RS_95._col0(Inner) <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_94] (rows=852 width=1910) + Select Operator [SEL_94] (rows=155 width=91) Output:["_col0"] - Filter Operator [FIL_93] (rows=852 width=1910) + Filter Operator [FIL_93] (rows=155 width=92) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + TableScan [TS_9] (rows=1704 width=92) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_75] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_75] (rows=1477476 width=0) Conds:RS_15._col1=RS_87._col0(Inner),Output:["_col2"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=3600 width=107) + Select Operator [SEL_86] (rows=655 width=8) Output:["_col0"] - Filter Operator [FIL_85] (rows=3600 width=107) + Filter Operator [FIL_85] (rows=655 width=8) predicate:((hd_dep_count = 5) and hd_demo_sk is not null) - TableScan [TS_6] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + TableScan [TS_6] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_74] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_74] (rows=16240953 width=0) Conds:RS_103._col0=RS_79._col0(Inner),Output:["_col1","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_79] PartitionCols:_col0 - Select Operator [SEL_78] (rows=14400 width=471) + Select Operator [SEL_78] (rows=1515 width=12) Output:["_col0"] - Filter Operator [FIL_77] (rows=14400 width=471) + Filter Operator [FIL_77] (rows=1515 width=12) predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) - TableScan [TS_3] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + TableScan [TS_3] (rows=86400 width=12) + default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=575995635 width=88) + Select Operator [SEL_102] (rows=501695814 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_101] (rows=575995635 width=88) + Filter Operator [FIL_101] (rows=501695814 width=11) predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_16_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_16_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_16_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_13_time_dim_t_time_sk_min) AND DynamicValue(RS_13_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_13_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_92] Group By Operator [GBY_91] (rows=1 width=12) @@ -127,7 +127,7 @@ Stage-0 SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=3600 width=107) + Select Operator [SEL_88] (rows=655 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] <-Reducer 12 [BROADCAST_EDGE] vectorized @@ -138,7 +138,7 @@ Stage-0 SHUFFLE [RS_98] Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=852 width=1910) + Select Operator [SEL_96] (rows=155 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_94] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -149,7 +149,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_82] Group By Operator [GBY_81] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_80] (rows=14400 width=471) + Select Operator [SEL_80] (rows=1515 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_78] diff --git a/ql/src/test/results/clientpositive/perf/tez/query97.q.out b/ql/src/test/results/clientpositive/perf/tez/query97.q.out index d23b4a3925..21daca3aed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query97.q.out @@ -82,40 +82,40 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_34] Group By Operator [GBY_33] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(_col0)","sum(_col1)","sum(_col2)"] - Select Operator [SEL_31] (rows=348477374 width=88) + Select Operator [SEL_31] (rows=19216436912 width=7) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_69] (rows=348477374 width=88) + Merge Join Operator [MERGEJOIN_69] (rows=19216436912 width=7) Conds:RS_86._col0, _col1=RS_93._col0, _col1(Outer),Output:["_col0","_col2"] <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_86] PartitionCols:_col0, _col1 - Group By Operator [GBY_85] (rows=316797606 width=88) + Group By Operator [GBY_85] (rows=95493908 width=6) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=633595212 width=88) + Group By Operator [GBY_10] (rows=95493908 width=6) Output:["_col0","_col1"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_67] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_67] (rows=95493908 width=6) Conds:RS_84._col0=RS_72._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_71] (rows=73049 width=1119) + Select Operator [SEL_71] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_70] (rows=73049 width=1119) + Filter Operator [FIL_70] (rows=317 width=8) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_84] PartitionCols:_col0 - Select Operator [SEL_83] (rows=575995635 width=88) + Select Operator [SEL_83] (rows=550076554 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_82] (rows=575995635 width=88) + Filter Operator [FIL_82] (rows=550076554 width=11) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + TableScan [TS_0] (rows=575995635 width=11) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_81] Group By Operator [GBY_80] (rows=1 width=12) @@ -124,20 +124,20 @@ Stage-0 SHUFFLE [RS_78] Group By Operator [GBY_76] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_73] (rows=73049 width=1119) + Select Operator [SEL_73] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_71] <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized FORWARD [RS_93] PartitionCols:_col0, _col1 - Group By Operator [GBY_92] (rows=158394413 width=135) + Group By Operator [GBY_92] (rows=49393705 width=7) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=316788826 width=135) + Group By Operator [GBY_24] (rows=49393705 width=7) Output:["_col0","_col1"],keys:_col1, _col2 - Merge Join Operator [MERGEJOIN_68] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_68] (rows=49393705 width=7) Conds:RS_91._col0=RS_74._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_74] @@ -146,12 +146,12 @@ Stage-0 <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_91] PartitionCols:_col0 - Select Operator [SEL_90] (rows=287989836 width=135) + Select Operator [SEL_90] (rows=286549727 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_89] (rows=287989836 width=135) + Filter Operator [FIL_89] (rows=286549727 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_14] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + TableScan [TS_14] (rows=287989836 width=11) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_88] Group By Operator [GBY_87] (rows=1 width=12) @@ -160,7 +160,7 @@ Stage-0 SHUFFLE [RS_79] Group By Operator [GBY_77] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_75] (rows=73049 width=1119) + Select Operator [SEL_75] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_71] diff --git a/ql/src/test/results/clientpositive/perf/tez/query98.q.out b/ql/src/test/results/clientpositive/perf/tez/query98.q.out index d81c1e6a2a..8403d20e70 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query98.q.out @@ -86,60 +86,60 @@ Stage-0 Stage-1 Reducer 6 vectorized File Output Operator [FS_84] - Select Operator [SEL_83] (rows=348477374 width=88) + Select Operator [SEL_83] (rows=138600 width=701) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_82] - Select Operator [SEL_81] (rows=348477374 width=88) + Select Operator [SEL_81] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_80] (rows=348477374 width=88) + PTF Operator [PTF_80] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_79] (rows=348477374 width=88) + Select Operator [SEL_79] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_78] PartitionCols:_col1 - Group By Operator [GBY_77] (rows=348477374 width=88) + Group By Operator [GBY_77] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=696954748 width=88) + Group By Operator [GBY_16] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col10, _col9, _col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_57] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_57] (rows=18334631 width=577) Conds:RS_12._col1=RS_68._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_68] PartitionCols:_col0 - Select Operator [SEL_67] (rows=462000 width=1436) + Select Operator [SEL_67] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_66] (rows=462000 width=1436) + Filter Operator [FIL_66] (rows=138600 width=581) predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] + TableScan [TS_6] (rows=462000 width=581) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_56] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_56] (rows=61115434 width=70) Conds:RS_76._col0=RS_60._col0(Inner),Output:["_col1","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_60] PartitionCols:_col0 - Select Operator [SEL_59] (rows=8116 width=1119) + Select Operator [SEL_59] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_58] (rows=8116 width=1119) + Filter Operator [FIL_58] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_76] PartitionCols:_col0 - Select Operator [SEL_75] (rows=575995635 width=88) + Select Operator [SEL_75] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_74] (rows=575995635 width=88) + Filter Operator [FIL_74] (rows=550076554 width=114) predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized BROADCAST [RS_73] Group By Operator [GBY_72] (rows=1 width=12) @@ -148,7 +148,7 @@ Stage-0 SHUFFLE [RS_71] Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_69] (rows=462000 width=1436) + Select Operator [SEL_69] (rows=138600 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_67] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -159,7 +159,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_63] Group By Operator [GBY_62] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_61] (rows=8116 width=1119) + Select Operator [SEL_61] (rows=8116 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_59] diff --git a/ql/src/test/results/clientpositive/perf/tez/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/query99.q.out index 341d61e7c4..745c5a9c4b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -81,14 +81,12 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -98,121 +96,99 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_142] - Limit [LIM_141] (rows=100 width=135) + File Output Operator [FS_132] + Limit [LIM_131] (rows=100 width=590) Number of rows:100 - Select Operator [SEL_140] (rows=210822976 width=135) + Select Operator [SEL_130] (rows=3869553 width=590) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - Select Operator [SEL_138] (rows=210822976 width=135) + SHUFFLE [RS_129] + Select Operator [SEL_128] (rows=3869553 width=590) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_137] (rows=210822976 width=135) + Group By Operator [GBY_127] (rows=3869553 width=406) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_29] (rows=421645953 width=135) + Group By Operator [GBY_29] (rows=7739106 width=406) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_57] (rows=421645953 width=135) + Top N Key Operator [TNK_57] (rows=15478212 width=289) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_27] (rows=421645953 width=135) + Select Operator [SEL_27] (rows=15478212 width=289) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_101] (rows=421645953 width=135) - Conds:RS_24._col3=RS_128._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + Merge Join Operator [MERGEJOIN_101] (rows=15478212 width=289) + Conds:RS_24._col3=RS_112._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] PartitionCols:_col0 - Select Operator [SEL_127] (rows=1 width=0) + Select Operator [SEL_111] (rows=1 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_126] (rows=1 width=0) + Filter Operator [FIL_110] (rows=1 width=88) predicate:sm_ship_mode_sk is not null - TableScan [TS_12] (rows=1 width=0) - default@ship_mode,ship_mode,Tbl:PARTIAL,Col:NONE,Output:["sm_ship_mode_sk","sm_type"] + TableScan [TS_12] (rows=1 width=88) + default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_type"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_100] (rows=383314495 width=135) - Conds:RS_21._col4=RS_120._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + Merge Join Operator [MERGEJOIN_100] (rows=46434637 width=209) + Conds:RS_21._col4=RS_126._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col10"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_119] (rows=27 width=1029) + Select Operator [SEL_125] (rows=27 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_118] (rows=27 width=1029) + Filter Operator [FIL_124] (rows=27 width=104) predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + TableScan [TS_9] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_99] (rows=348467716 width=135) - Conds:RS_18._col2=RS_112._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col8"] + Merge Join Operator [MERGEJOIN_99] (rows=46434637 width=113) + Conds:RS_18._col2=RS_123._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col8"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_111] (rows=60 width=2045) + Select Operator [SEL_122] (rows=60 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_110] (rows=60 width=2045) + Filter Operator [FIL_121] (rows=60 width=102) predicate:cc_call_center_sk is not null - TableScan [TS_6] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"] + TableScan [TS_6] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_98] (rows=316788826 width=135) - Conds:RS_136._col1=RS_104._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_98] (rows=46434637 width=19) + Conds:RS_120._col1=RS_104._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_103] (rows=73049 width=1119) + Select Operator [SEL_103] (rows=317 width=8) Output:["_col0"] - Filter Operator [FIL_102] (rows=73049 width=1119) + Filter Operator [FIL_102] (rows=317 width=8) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_120] PartitionCols:_col1 - Select Operator [SEL_135] (rows=287989836 width=135) + Select Operator [SEL_119] (rows=282273729 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_134] (rows=287989836 width=135) - predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and (cs_warehouse_sk BETWEEN DynamicValue(RS_22_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_22_warehouse_w_warehouse_sk_max) and in_bloom_filter(cs_warehouse_sk, DynamicValue(RS_22_warehouse_w_warehouse_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_warehouse_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] - <-Reducer 11 [BROADCAST_EDGE] vectorized + Filter Operator [FIL_118] (rows=282273729 width=19) + predicate:((cs_ship_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_0] (rows=287989836 width=19) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] + <-Reducer 13 [BROADCAST_EDGE] vectorized BROADCAST [RS_117] Group By Operator [GBY_116] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_115] Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_113] (rows=60 width=2045) + Select Operator [SEL_113] (rows=1 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_111] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_125] - Group By Operator [GBY_124] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_121] (rows=27 width=1029) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_119] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_133] - Group By Operator [GBY_132] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_129] (rows=1 width=0) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_127] <-Reducer 9 [BROADCAST_EDGE] vectorized BROADCAST [RS_109] Group By Operator [GBY_108] (rows=1 width=12) @@ -221,7 +197,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_107] Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_105] (rows=73049 width=1119) + Select Operator [SEL_105] (rows=317 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_103]