diff --git data/scripts/q_perf_test_init_constraints.sql data/scripts/q_perf_test_init_constraints.sql index 3b3f503ee4..d08f5c6051 100644 --- data/scripts/q_perf_test_init_constraints.sql +++ data/scripts/q_perf_test_init_constraints.sql @@ -762,6 +762,7 @@ alter table web_site change column web_site_id web_site_id string constraint wsi alter table web_page change column wp_web_page_id wp_web_page_id string constraint wpid_nn not null disable novalidate rely; alter table warehouse change column w_warehouse_id w_warehouse_id string constraint wid_nn not null disable novalidate rely; alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely; +alter table customer change column c_customer_id c_customer_id string constraint cid_uq unique disable novalidate rely; alter table customer_address change column ca_address_id ca_address_id string constraint caid_nn not null disable novalidate rely; alter table date_dim change column d_date_id d_date_id string constraint did_nn not null disable novalidate rely; alter table item change column i_item_id i_item_id string constraint itid_nn not null disable novalidate rely; diff --git ql/src/test/queries/clientpositive/perf/cbo_query11.q ql/src/test/queries/clientpositive/perf/cbo_query11.q index 09d9529d71..224a80886b 100644 --- ql/src/test/queries/clientpositive/perf/cbo_query11.q +++ ql/src/test/queries/clientpositive/perf/cbo_query11.q @@ -5,7 +5,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -20,17 +20,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -51,7 +50,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -63,15 +66,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100; -- end query 1 in stream 0 using template query11.tpl diff --git ql/src/test/queries/clientpositive/perf/cbo_query4.q ql/src/test/queries/clientpositive/perf/cbo_query4.q index b47d174331..8d89c06180 100644 --- ql/src/test/queries/clientpositive/perf/cbo_query4.q +++ ql/src/test/queries/clientpositive/perf/cbo_query4.q @@ -74,7 +74,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -92,12 +96,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -105,7 +109,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100; -- end query 1 in stream 0 using template query4.tpl diff --git ql/src/test/queries/clientpositive/perf/cbo_query74.q ql/src/test/queries/clientpositive/perf/cbo_query74.q index 71954c8781..526c347088 100644 --- ql/src/test/queries/clientpositive/perf/cbo_query74.q +++ ql/src/test/queries/clientpositive/perf/cbo_query74.q @@ -6,14 +6,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -23,20 +23,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -49,15 +49,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100; -- end query 1 in stream 0 using template query74.tpl diff --git ql/src/test/queries/clientpositive/perf/query11.q ql/src/test/queries/clientpositive/perf/query11.q index 6017c89790..29d0bdb7fd 100644 --- ql/src/test/queries/clientpositive/perf/query11.q +++ ql/src/test/queries/clientpositive/perf/query11.q @@ -5,7 +5,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -20,17 +20,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -51,7 +50,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -63,15 +66,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100; -- end query 1 in stream 0 using template query11.tpl diff --git ql/src/test/queries/clientpositive/perf/query4.q ql/src/test/queries/clientpositive/perf/query4.q index 631a464028..aed86316ff 100644 --- ql/src/test/queries/clientpositive/perf/query4.q +++ ql/src/test/queries/clientpositive/perf/query4.q @@ -74,7 +74,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -92,12 +96,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -105,7 +109,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100; -- end query 1 in stream 0 using template query4.tpl diff --git ql/src/test/queries/clientpositive/perf/query74.q ql/src/test/queries/clientpositive/perf/query74.q index b25db9c0e0..66499edfcb 100644 --- ql/src/test/queries/clientpositive/perf/query74.q +++ ql/src/test/queries/clientpositive/perf/query74.q @@ -6,14 +6,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -23,20 +23,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -49,15 +49,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100; -- end query 1 in stream 0 using template query74.tpl diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out index 65a61d40d1..a3d4008d7e 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out @@ -3,7 +3,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -18,17 +18,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -49,7 +48,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -61,15 +64,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -82,7 +88,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -97,17 +103,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -128,7 +133,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -140,15 +149,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -157,10 +169,10 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) - HiveProject(c_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $5), CASE(IS NOT NULL($6), CASE($9, >(/($4, $8), /($2, $6)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2], customer_birth_country=[$3]) + HiveJoin(condition=[AND(=($0, $7), CASE(IS NOT NULL($8), CASE($11, >(/($6, $10), /($4, $8)), >(0:DECIMAL(1, 0), /($4, $8))), CASE($11, >(/($6, $10), 0:DECIMAL(1, 0)), false)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -171,7 +183,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -186,9 +198,9 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f9=[$7]) + HiveProject($f0=[$0], $f8=[$7]) HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -200,7 +212,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) HiveFilter(condition=[>($7, 0)]) @@ -214,6 +226,6 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out index a35db692b3..546f4bd8af 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out @@ -72,7 +72,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -90,12 +94,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -103,7 +107,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@catalog_sales @@ -186,7 +193,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -204,12 +215,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -217,7 +228,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@catalog_sales @@ -227,10 +241,10 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) - HiveProject(customer_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $7), CASE(IS NOT NULL($8), CASE($14, >(/($4, $13), /($2, $8)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f3=[$3], $f8=[$7]) +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2], customer_birth_country=[$3]) + HiveJoin(condition=[AND(=($0, $9), CASE(IS NOT NULL($10), CASE($16, >(/($6, $15), /($4, $10)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -241,7 +255,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[AND(=($4, $0), CASE($8, CASE($11, >(/($1, $10), /($3, $7)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f8=[$7]) @@ -255,7 +269,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -271,7 +285,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f8=[$7]) HiveFilter(condition=[>($7, 0)]) @@ -285,7 +299,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) HiveFilter(condition=[>($7, 0)]) @@ -299,7 +313,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) HiveFilter(condition=[>($7, 0)]) @@ -313,6 +327,6 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out index 047e21d943..d14c39a92a 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out @@ -4,14 +4,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -21,20 +21,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -47,15 +47,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -69,14 +69,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -86,20 +86,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -112,15 +112,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -129,11 +129,11 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) +HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2]) HiveJoin(condition=[AND(=($0, $6), CASE(IS NOT NULL($7), CASE($10, >(/($5, $9), /($3, $7)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[sum($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) @@ -143,12 +143,12 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IN($6, 1998, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f4=[$3]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[sum($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) @@ -158,11 +158,11 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1999), IN($6, 1998, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f4=[$3]) HiveFilter(condition=[>($3, 0)]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[sum($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) @@ -172,11 +172,11 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1998), IN($6, 1998, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) HiveFilter(condition=[>($3, 0)]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[sum($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) @@ -186,6 +186,6 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 1998), IN($6, 1998, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out index 127003c78b..0136ee4bb5 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -3,7 +3,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -18,17 +18,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -49,7 +48,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -61,15 +64,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -82,7 +88,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -97,17 +103,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -128,7 +133,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -140,15 +149,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -157,51 +169,26 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) - HiveProject(c_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $8), CASE(IS NOT NULL($9), CASE($7, >(/($4, $6), /($2, $9)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0)]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f9=[$7]) - HiveFilter(condition=[>($7, 0)]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2], customer_birth_country=[$3]) + HiveJoin(condition=[AND(CASE(IS NOT NULL($6), CASE($11, >(/($8, $10), /($4, $6)), >(0:DECIMAL(1, 0), /($4, $6))), CASE($11, >(/($8, $10), 0:DECIMAL(1, 0)), false)), =($0, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], c_birth_country=[$3], $f4=[$4]) + HiveAggregate(group=[{1, 2, 3, 5}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) @@ -210,6 +197,31 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) + HiveFilter(condition=[=($6, 1999)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1999)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out index f0ec403984..987a0f348e 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out @@ -72,7 +72,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -90,12 +94,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -103,7 +107,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@catalog_sales @@ -186,7 +193,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -204,12 +215,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -217,7 +228,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@catalog_sales @@ -227,24 +241,12 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) - HiveProject(customer_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $10), CASE(IS NOT NULL($11), CASE($14, >(/($6, $13), /($2, $11)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f3=[$3], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($7, $0), CASE($6, CASE($11, >(/($3, $10), /($1, $5)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(customer_id=[$2], customer_first_name=[$3], customer_last_name=[$4], customer_birth_country=[$5]) + HiveJoin(condition=[AND(=($2, $14), CASE($16, CASE($13, >(/($10, $12), /($1, $15)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) @@ -253,39 +255,26 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) + HiveFilter(condition=[=($6, 2000)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0)]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f8=[$7]) - HiveFilter(condition=[>($7, 0)]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[AND(CASE(IS NOT NULL($6), CASE($11, >(/($8, $10), /($4, $6)), false), false), =($0, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], c_birth_country=[$3], $f4=[$4]) + HiveAggregate(group=[{1, 2, 3, 5}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) @@ -294,19 +283,44 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) + HiveFilter(condition=[=($6, 1999)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0)]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1999)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1999)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out index ee232fa4e3..289e5d2569 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out @@ -4,14 +4,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -21,20 +21,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -47,15 +47,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -69,14 +69,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -86,20 +86,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -112,15 +112,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -129,51 +129,26 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) +HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2]) - HiveJoin(condition=[AND(=($0, $9), CASE(IS NOT NULL($10), CASE($8, >(/($5, $7), /($3, $10)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f4=[$3]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) - HiveFilter(condition=[>($3, 0)]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f4=[$3]) - HiveFilter(condition=[>($3, 0)]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[AND(CASE(IS NOT NULL($5), CASE($10, >(/($7, $9), /($3, $5)), false), false), =($0, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[sum($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), IN($6, 1998, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) @@ -182,6 +157,31 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) + HiveFilter(condition=[AND(=($6, 1998), IN($6, 1998, 1999))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_id=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), IN($6, 1998, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IN($6, 1998, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out index ecc69e3155..7f9df5e8af 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out @@ -3,7 +3,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -18,17 +18,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -49,7 +48,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -61,15 +64,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -82,7 +88,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -97,17 +103,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -128,7 +133,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -140,15 +149,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -159,29 +171,29 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 24 (BROADCAST_EDGE) -Map 13 <- Reducer 23 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 9 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE) +Map 24 <- Reducer 14 (BROADCAST_EDGE) +Map 26 <- Reducer 18 (BROADCAST_EDGE) +Map 27 <- Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 24 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 26 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 27 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 @@ -189,241 +201,238 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_350] - Limit [LIM_349] (rows=100 width=85) + File Output Operator [FS_346] + Limit [LIM_345] (rows=100 width=372) Number of rows:100 - Select Operator [SEL_348] (rows=12248094 width=85) - Output:["_col0"] + Select Operator [SEL_344] (rows=13333333 width=372) + Output:["_col0","_col1","_col2","_col3"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_89] - Select Operator [SEL_88] (rows=12248094 width=85) - Output:["_col0"] - Top N Key Operator [TNK_154] (rows=12248094 width=537) - keys:_col8,top n:100 - Filter Operator [FIL_87] (rows=12248094 width=537) - predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col2) THEN (((_col6 / _col1) > (_col9 / _col4))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_284] (rows=24496188 width=537) - Conds:RS_84._col3=RS_347._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col8","_col9"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + Select Operator [SEL_88] (rows=13333333 width=372) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_152] (rows=13333333 width=824) + keys:_col0, _col1, _col2, _col3,top n:100 + Filter Operator [FIL_87] (rows=13333333 width=824) + predicate:CASE WHEN (_col6 is not null) THEN (CASE WHEN (_col11) THEN (((_col8 / _col10) > (_col4 / _col6))) ELSE ((0 > (_col4 / _col6))) END) ELSE (CASE WHEN (_col11) THEN (((_col8 / _col10) > 0)) ELSE (false) END) END + Merge Join Operator [MERGEJOIN_282] (rows=26666666 width=824) + Conds:RS_84._col0=RS_343._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col11"] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] PartitionCols:_col0 - Select Operator [SEL_346] (rows=80000000 width=297) + Select Operator [SEL_342] (rows=14325562 width=216) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_345] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_74] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=764) - Conds:RS_70._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] - PartitionCols:_col0 - Select Operator [SEL_312] (rows=80000000 width=656) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - TableScan [TS_65] (rows=80000000 width=656) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=115) - Conds:RS_344._col0=RS_291._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_291] - PartitionCols:_col0 - Select Operator [SEL_288] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_285] (rows=652 width=8) - predicate:(d_year = 2002) - TableScan [TS_62] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] - PartitionCols:_col0 - Select Operator [SEL_343] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=525327388 width=221) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_59] (rows=575995635 width=221) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] - Group By Operator [GBY_299] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_292] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] + Filter Operator [FIL_341] (rows=14325562 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_340] (rows=42976686 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col0 + Group By Operator [GBY_73] (rows=51391963 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=212) + Conds:RS_69._col1=RS_324._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] + PartitionCols:_col0 + Select Operator [SEL_321] (rows=80000000 width=104) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=80000000 width=104) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) + Conds:RS_339._col0=RS_297._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_297] + PartitionCols:_col0 + Select Operator [SEL_290] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_286] (rows=652 width=8) + predicate:(d_year = 1999) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_339] + PartitionCols:_col0 + Select Operator [SEL_338] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_337] (rows=143930993 width=231) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_58] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_336] + Group By Operator [GBY_335] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_302] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_298] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_290] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_84] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_283] (rows=20485012 width=440) - Conds:RS_81._col3=RS_339._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_281] (rows=26666666 width=708) + Conds:RS_81._col0=RS_334._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] PartitionCols:_col0 - Select Operator [SEL_338] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_337] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_55] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=764) - Conds:RS_51._col1=RS_314._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) - Conds:RS_336._col0=RS_293._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_293] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_288] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] - PartitionCols:_col0 - Select Operator [SEL_335] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_334] (rows=143930993 width=231) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=144002668 width=231) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_294] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] + Group By Operator [GBY_333] (rows=42976686 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Group By Operator [GBY_54] (rows=51391963 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=212) + Conds:RS_50._col1=RS_323._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_276] (rows=51391963 width=115) + Conds:RS_332._col0=RS_295._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_289] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_285] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_3] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_330] (rows=143930993 width=231) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_39] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_329] + Group By Operator [GBY_328] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_305] + Group By Operator [GBY_301] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_296] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_289] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_81] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_282] (rows=17130654 width=328) - Conds:RS_321._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_280] (rows=26666666 width=596) + Conds:RS_315._col0=RS_327._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col0 - Select Operator [SEL_330] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_328] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_327] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_35] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_277] (rows=187573258 width=764) - Conds:RS_31._col1=RS_316._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + Filter Operator [FIL_326] (rows=22300081 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_325] (rows=66900244 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=80000000 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_275] (rows=187573258 width=212) + Conds:RS_30._col1=RS_322._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_274] (rows=187573258 width=115) + Conds:RS_320._col0=RS_293._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_293] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_284] (rows=652 width=8) + predicate:(d_year = 1999) + Please refer to the previous TableScan [TS_3] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=187573258 width=115) - Conds:RS_326._col0=RS_297._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] - PartitionCols:_col0 - Select Operator [SEL_290] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_287] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_62] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] - PartitionCols:_col0 - Select Operator [SEL_325] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_324] (rows=525327388 width=221) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=221) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_323] - Group By Operator [GBY_322] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] + Select Operator [SEL_319] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_318] (rows=525327388 width=221) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + Group By Operator [GBY_300] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_294] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_288] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_315] PartitionCols:_col0 - Select Operator [SEL_320] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_319] (rows=17130654 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_318] (rows=51391963 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_317] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_15] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_275] (rows=51391963 width=764) - Conds:RS_11._col1=RS_315._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_274] (rows=51391963 width=115) - Conds:RS_311._col0=RS_295._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_286] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_62] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] - PartitionCols:_col0 - Select Operator [SEL_310] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=143930993 width=231) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=231) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] + Group By Operator [GBY_314] (rows=80000000 width=484) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_15] (rows=80000000 width=484) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col9 + Merge Join Operator [MERGEJOIN_273] (rows=187573258 width=484) + Conds:RS_11._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col9"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_313] + PartitionCols:_col0 + Select Operator [SEL_312] (rows=80000000 width=376) + Output:["_col0","_col1","_col2","_col3","_col5"] + TableScan [TS_6] (rows=80000000 width=376) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_birth_country"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_272] (rows=187573258 width=115) + Conds:RS_311._col0=RS_291._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_291] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_283] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_309] (rows=525327388 width=221) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_308] + Group By Operator [GBY_307] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_303] + Group By Operator [GBY_299] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_292] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_287] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out index 5f5322c38b..585f4d6b9c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out @@ -72,7 +72,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -90,12 +94,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -103,7 +107,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@catalog_sales @@ -186,7 +193,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -204,12 +215,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -217,7 +228,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@catalog_sales @@ -229,397 +243,390 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 35 (BROADCAST_EDGE) -Map 11 <- Reducer 36 (BROADCAST_EDGE) -Map 15 <- Reducer 37 (BROADCAST_EDGE) -Map 19 <- Reducer 34 (BROADCAST_EDGE) -Map 23 <- Reducer 33 (BROADCAST_EDGE) -Map 27 <- Reducer 32 (BROADCAST_EDGE) +Map 1 <- Reducer 30 (BROADCAST_EDGE) +Map 12 <- Reducer 31 (BROADCAST_EDGE) +Map 16 <- Reducer 32 (BROADCAST_EDGE) +Map 20 <- Reducer 33 (BROADCAST_EDGE) +Map 24 <- Reducer 29 (BROADCAST_EDGE) +Map 39 <- Reducer 37 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 13 <- Map 38 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 17 <- Map 38 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 21 <- Map 38 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 25 <- Map 38 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 29 <- Map 38 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 38 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 31 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 14 <- Map 38 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 18 <- Map 38 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 22 <- Map 38 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 26 <- Map 38 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 28 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 35 <- Map 38 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (SIMPLE_EDGE) +Reducer 37 <- Map 28 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 26 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 30 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 19 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 27 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 36 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_563] - Limit [LIM_562] (rows=100 width=85) + File Output Operator [FS_556] + Limit [LIM_555] (rows=100 width=372) Number of rows:100 - Select Operator [SEL_561] (rows=7323197 width=85) - Output:["_col0"] + Select Operator [SEL_554] (rows=6666666 width=372) + Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_135] - Select Operator [SEL_134] (rows=7323197 width=85) - Output:["_col0"] - Top N Key Operator [TNK_250] (rows=7323197 width=537) - keys:_col13,top n:100 - Filter Operator [FIL_133] (rows=7323197 width=537) - predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col7) THEN (((_col9 / _col6) > (_col14 / _col4))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_467] (rows=14646395 width=537) - Conds:RS_130._col3=RS_560._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col13","_col14"] - <-Reducer 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_560] + Select Operator [SEL_134] (rows=6666666 width=372) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_247] (rows=6666666 width=828) + keys:_col0, _col1, _col2, _col3,top n:100 + Filter Operator [FIL_133] (rows=6666666 width=828) + predicate:CASE WHEN (_col16) THEN (CASE WHEN (_col11) THEN (((_col8 / _col10) > (_col13 / _col15))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_464] (rows=13333333 width=828) + Conds:RS_130._col0=RS_553._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col8","_col10","_col11","_col13","_col15","_col16"] + <-Reducer 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_553] PartitionCols:_col0 - Select Operator [SEL_559] (rows=80000000 width=297) + Select Operator [SEL_552] (rows=14325562 width=216) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_558] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_114] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_113] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_462] (rows=187573258 width=764) - Conds:RS_109._col1=RS_506._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_506] - PartitionCols:_col0 - Select Operator [SEL_505] (rows=80000000 width=656) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - TableScan [TS_104] (rows=80000000 width=656) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_109] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_461] (rows=187573258 width=115) - Conds:RS_557._col0=RS_476._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_476] - PartitionCols:_col0 - Select Operator [SEL_472] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_468] (rows=652 width=8) - predicate:(d_year = 2002) - TableScan [TS_101] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_557] - PartitionCols:_col0 - Select Operator [SEL_556] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_555] (rows=525327388 width=435) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_107_date_dim_d_date_sk_min) AND DynamicValue(RS_107_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_107_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_98] (rows=575995635 width=435) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_554] - Group By Operator [GBY_553] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_494] - Group By Operator [GBY_488] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_477] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_472] + Filter Operator [FIL_551] (rows=14325562 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_550] (rows=42976686 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_113] + PartitionCols:_col0 + Group By Operator [GBY_112] (rows=51391963 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_459] (rows=51391963 width=212) + Conds:RS_108._col1=RS_518._col0(Inner),Output:["_col2","_col5"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_518] + PartitionCols:_col0 + Select Operator [SEL_513] (rows=80000000 width=104) + Output:["_col0","_col1"] + TableScan [TS_84] (rows=80000000 width=104) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_108] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_458] (rows=51391963 width=115) + Conds:RS_549._col0=RS_485._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_485] + PartitionCols:_col0 + Select Operator [SEL_474] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_469] (rows=652 width=8) + predicate:(d_year = 1999) + TableScan [TS_81] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_549] + PartitionCols:_col0 + Select Operator [SEL_548] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_547] (rows=143930993 width=455) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_106_date_dim_d_date_sk_min) AND DynamicValue(RS_106_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_106_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_97] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_546] + Group By Operator [GBY_545] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_498] + Group By Operator [GBY_492] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_486] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_474] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_130] - PartitionCols:_col3 - Filter Operator [FIL_129] (rows=12248093 width=668) - predicate:CASE WHEN (_col2) THEN (CASE WHEN (_col7) THEN (((_col9 / _col6) > (_col11 / _col1))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_466] (rows=24496187 width=668) - Conds:RS_126._col3=RS_552._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col11"] - <-Reducer 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_552] - PartitionCols:_col0 - Select Operator [SEL_551] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_550] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_94] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_460] (rows=51391963 width=764) - Conds:RS_90._col1=RS_507._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_463] (rows=13333333 width=712) + Conds:RS_127._col0=RS_544._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col8","_col10","_col11","_col13"] + <-Reducer 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_544] + PartitionCols:_col0 + Group By Operator [GBY_543] (rows=42976686 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Group By Operator [GBY_93] (rows=51391963 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_457] (rows=51391963 width=212) + Conds:RS_89._col1=RS_514._col0(Inner),Output:["_col2","_col5"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_514] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_513] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_456] (rows=51391963 width=115) + Conds:RS_542._col0=RS_475._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_475] + PartitionCols:_col0 + Select Operator [SEL_470] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_465] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_81] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_542] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_90] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_459] (rows=51391963 width=115) - Conds:RS_549._col0=RS_478._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_478] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_472] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_549] - PartitionCols:_col0 - Select Operator [SEL_548] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_547] (rows=143930993 width=455) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_88_date_dim_d_date_sk_min) AND DynamicValue(RS_88_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_88_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_79] (rows=144002668 width=455) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_546] - Group By Operator [GBY_545] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_495] - Group By Operator [GBY_489] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_479] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_472] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_126] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_465] (rows=20485012 width=556) - Conds:RS_123._col3=RS_544._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_544] + Select Operator [SEL_541] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_540] (rows=143930993 width=455) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_78] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_539] + Group By Operator [GBY_538] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_493] + Group By Operator [GBY_487] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_476] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_470] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_127] + PartitionCols:_col0 + Filter Operator [FIL_126] (rows=13333333 width=824) + predicate:CASE WHEN (_col6 is not null) THEN (CASE WHEN (_col11) THEN (((_col8 / _col10) > (_col4 / _col6))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_462] (rows=26666666 width=824) + Conds:RS_123._col0=RS_537._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col11"] + <-Reducer 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_537] PartitionCols:_col0 - Select Operator [SEL_543] (rows=80000000 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_542] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_75] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_458] (rows=101084444 width=764) - Conds:RS_71._col1=RS_508._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_508] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_457] (rows=101084444 width=115) - Conds:RS_541._col0=RS_480._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_480] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_472] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_541] - PartitionCols:_col0 - Select Operator [SEL_540] (rows=285117831 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_539] (rows=285117831 width=453) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_69_date_dim_d_date_sk_min) AND DynamicValue(RS_69_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_69_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_60] (rows=287989836 width=453) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_538] - Group By Operator [GBY_537] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_496] - Group By Operator [GBY_490] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_481] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_472] + Select Operator [SEL_536] (rows=22300081 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_535] (rows=22300081 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_534] (rows=66900244 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col0 + Group By Operator [GBY_73] (rows=80000000 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_455] (rows=101084444 width=212) + Conds:RS_69._col1=RS_517._col0(Inner),Output:["_col2","_col5"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_517] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_513] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_454] (rows=101084444 width=115) + Conds:RS_533._col0=RS_483._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_483] + PartitionCols:_col0 + Select Operator [SEL_473] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_468] (rows=652 width=8) + predicate:(d_year = 1999) + Please refer to the previous TableScan [TS_81] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_533] + PartitionCols:_col0 + Select Operator [SEL_532] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_531] (rows=285117831 width=453) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_58] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_530] + Group By Operator [GBY_529] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_497] + Group By Operator [GBY_491] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_484] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_473] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_123] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_464] (rows=17130654 width=444) - Conds:RS_120._col3=RS_536._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_536] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_461] (rows=26666666 width=708) + Conds:RS_120._col0=RS_528._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8"] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_528] PartitionCols:_col0 - Select Operator [SEL_535] (rows=26666666 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_534] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_533] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_532] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + Group By Operator [GBY_527] (rows=66900244 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Group By Operator [GBY_54] (rows=80000000 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_453] (rows=101084444 width=212) + Conds:RS_50._col1=RS_516._col0(Inner),Output:["_col2","_col5"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_516] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_513] <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_55] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_456] (rows=101084444 width=764) - Conds:RS_51._col1=RS_511._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_452] (rows=101084444 width=115) + Conds:RS_526._col0=RS_481._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_481] + PartitionCols:_col0 + Select Operator [SEL_472] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_467] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_81] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_526] + PartitionCols:_col0 + Select Operator [SEL_525] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_524] (rows=285117831 width=453) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_39] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_523] + Group By Operator [GBY_522] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_496] + Group By Operator [GBY_490] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_482] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_472] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_120] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_460] (rows=26666666 width=596) + Conds:RS_507._col0=RS_521._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6"] + <-Reducer 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_521] + PartitionCols:_col0 + Filter Operator [FIL_520] (rows=22300081 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_519] (rows=66900244 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=80000000 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_451] (rows=187573258 width=212) + Conds:RS_30._col1=RS_515._col0(Inner),Output:["_col2","_col5"] <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_511] + SHUFFLE [RS_515] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_51] + Please refer to the previous Select Operator [SEL_513] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_455] (rows=101084444 width=115) - Conds:RS_531._col0=RS_486._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_486] + Merge Join Operator [MERGEJOIN_450] (rows=187573258 width=115) + Conds:RS_512._col0=RS_479._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_479] PartitionCols:_col0 - Select Operator [SEL_475] (rows=652 width=4) + Select Operator [SEL_471] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_471] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_101] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_531] + Filter Operator [FIL_466] (rows=652 width=8) + predicate:(d_year = 1999) + Please refer to the previous TableScan [TS_81] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_512] PartitionCols:_col0 - Select Operator [SEL_530] (rows=285117831 width=119) + Select Operator [SEL_511] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_529] (rows=285117831 width=453) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=287989836 width=453) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_528] - Group By Operator [GBY_527] (rows=1 width=12) + Filter Operator [FIL_510] (rows=525327388 width=435) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_509] + Group By Operator [GBY_508] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_499] - Group By Operator [GBY_493] (rows=1 width=12) + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_495] + Group By Operator [GBY_489] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_487] (rows=652 width=4) + Select Operator [SEL_480] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_120] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_463] (rows=17130654 width=328) - Conds:RS_516._col0=RS_526._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_526] - PartitionCols:_col0 - Select Operator [SEL_525] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_524] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_523] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_522] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_35] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_454] (rows=187573258 width=764) - Conds:RS_31._col1=RS_510._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_453] (rows=187573258 width=115) - Conds:RS_521._col0=RS_484._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_484] - PartitionCols:_col0 - Select Operator [SEL_474] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_470] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_101] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_521] - PartitionCols:_col0 - Select Operator [SEL_520] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_519] (rows=525327388 width=435) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=435) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_518] - Group By Operator [GBY_517] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_498] - Group By Operator [GBY_492] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_485] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_474] + Please refer to the previous Select Operator [SEL_471] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_516] + SHUFFLE [RS_507] PartitionCols:_col0 - Select Operator [SEL_515] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_514] (rows=17130654 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_513] (rows=51391963 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_512] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_15] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_452] (rows=51391963 width=764) - Conds:RS_11._col1=RS_509._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_509] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_451] (rows=51391963 width=115) - Conds:RS_504._col0=RS_482._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_482] - PartitionCols:_col0 - Select Operator [SEL_473] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_469] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_101] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_504] - PartitionCols:_col0 - Select Operator [SEL_503] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_502] (rows=143930993 width=455) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=455) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_501] - Group By Operator [GBY_500] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_497] - Group By Operator [GBY_491] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_483] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_473] + Group By Operator [GBY_506] (rows=80000000 width=484) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_15] (rows=80000000 width=484) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col9 + Merge Join Operator [MERGEJOIN_449] (rows=187573258 width=484) + Conds:RS_11._col1=RS_505._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col9"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_505] + PartitionCols:_col0 + Select Operator [SEL_504] (rows=80000000 width=376) + Output:["_col0","_col1","_col2","_col3","_col5"] + TableScan [TS_6] (rows=80000000 width=376) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_birth_country"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_448] (rows=187573258 width=115) + Conds:RS_503._col0=RS_477._col0(Inner),Output:["_col1","_col2"] + <-Map 28 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_477] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_470] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_503] + PartitionCols:_col0 + Select Operator [SEL_502] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_501] (rows=525327388 width=435) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_500] + Group By Operator [GBY_499] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_494] + Group By Operator [GBY_488] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_478] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_470] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out index 31d1fee130..39c76fc82c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out @@ -4,14 +4,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -21,20 +21,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -47,15 +47,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -69,14 +69,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -86,20 +86,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -112,15 +112,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -131,29 +131,29 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 24 (BROADCAST_EDGE) -Map 13 <- Reducer 23 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 9 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE) +Map 24 <- Reducer 14 (BROADCAST_EDGE) +Map 26 <- Reducer 18 (BROADCAST_EDGE) +Map 27 <- Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 24 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 26 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 27 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 @@ -161,239 +161,238 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_349] - Limit [LIM_348] (rows=100 width=280) + File Output Operator [FS_346] + Limit [LIM_345] (rows=100 width=280) Number of rows:100 - Select Operator [SEL_347] (rows=12248094 width=280) + Select Operator [SEL_344] (rows=13333333 width=280) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_89] - Select Operator [SEL_88] (rows=12248094 width=280) + Select Operator [SEL_88] (rows=13333333 width=280) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_154] (rows=12248094 width=732) - keys:_col8, _col7, _col9,top n:100 - Filter Operator [FIL_87] (rows=12248094 width=732) - predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col2) THEN (((_col6 / _col1) > (_col10 / _col4))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_284] (rows=24496188 width=732) - Conds:RS_84._col3=RS_346._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] + Top N Key Operator [TNK_152] (rows=13333333 width=732) + keys:_col2, _col0, _col1,top n:100 + Filter Operator [FIL_87] (rows=13333333 width=732) + predicate:CASE WHEN (_col5 is not null) THEN (CASE WHEN (_col10) THEN (((_col7 / _col9) > (_col3 / _col5))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_282] (rows=26666666 width=732) + Conds:RS_84._col0=RS_343._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col7","_col9","_col10"] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] PartitionCols:_col0 - Group By Operator [GBY_345] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_74] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=377) - Conds:RS_70._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] - PartitionCols:_col0 - Select Operator [SEL_312] (rows=80000000 width=284) - Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_65] (rows=80000000 width=284) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=101) - Conds:RS_344._col0=RS_291._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_291] - PartitionCols:_col0 - Select Operator [SEL_288] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_285] (rows=652 width=8) - predicate:((d_year = 2002) and (d_year) IN (2001, 2002)) - TableScan [TS_62] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + Select Operator [SEL_342] (rows=14325562 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_341] (rows=14325562 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_340] (rows=42976686 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col0 + Group By Operator [GBY_73] (rows=51391963 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=211) + Conds:RS_69._col1=RS_324._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col0 - Select Operator [SEL_343] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=525327388 width=114) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_59] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] - Group By Operator [GBY_299] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_292] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] + Select Operator [SEL_321] (rows=80000000 width=104) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=80000000 width=104) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) + Conds:RS_339._col0=RS_297._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_297] + PartitionCols:_col0 + Select Operator [SEL_290] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_286] (rows=652 width=8) + predicate:((d_year = 1998) and (d_year) IN (1998, 1999)) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_339] + PartitionCols:_col0 + Select Operator [SEL_338] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_337] (rows=143930993 width=119) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_58] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_336] + Group By Operator [GBY_335] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_302] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_298] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_290] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_84] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_283] (rows=20485012 width=440) - Conds:RS_81._col3=RS_339._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_281] (rows=26666666 width=616) + Conds:RS_81._col0=RS_334._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col7"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] PartitionCols:_col0 - Select Operator [SEL_338] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_337] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_55] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=391) - Conds:RS_51._col1=RS_314._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) - Conds:RS_336._col0=RS_293._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_293] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_288] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] - PartitionCols:_col0 - Select Operator [SEL_335] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_334] (rows=143930993 width=119) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=144002668 width=119) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_294] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] + Group By Operator [GBY_333] (rows=42976686 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Group By Operator [GBY_54] (rows=51391963 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=211) + Conds:RS_50._col1=RS_323._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_276] (rows=51391963 width=115) + Conds:RS_332._col0=RS_295._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_295] + PartitionCols:_col0 + Select Operator [SEL_289] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_285] (rows=652 width=8) + predicate:((d_year = 1999) and (d_year) IN (1998, 1999)) + Please refer to the previous TableScan [TS_3] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_330] (rows=143930993 width=119) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_39] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_329] + Group By Operator [GBY_328] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_305] + Group By Operator [GBY_301] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_296] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_289] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_81] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_282] (rows=17130654 width=328) - Conds:RS_321._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_280] (rows=26666666 width=504) + Conds:RS_315._col0=RS_327._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col0 - Select Operator [SEL_330] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=26666666 width=212) - predicate:(_col3 > 0) - Select Operator [SEL_328] (rows=80000000 width=212) - Output:["_col0","_col3"] - Group By Operator [GBY_327] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_35] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_277] (rows=187573258 width=377) - Conds:RS_31._col1=RS_316._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + Filter Operator [FIL_326] (rows=22300081 width=212) + predicate:(_col1 > 0) + Group By Operator [GBY_325] (rows=66900244 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_34] (rows=80000000 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_275] (rows=187573258 width=197) + Conds:RS_30._col1=RS_322._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_274] (rows=187573258 width=101) + Conds:RS_320._col0=RS_293._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_293] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_284] (rows=652 width=8) + predicate:((d_year = 1998) and (d_year) IN (1998, 1999)) + Please refer to the previous TableScan [TS_3] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=187573258 width=101) - Conds:RS_326._col0=RS_297._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] - PartitionCols:_col0 - Select Operator [SEL_290] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_287] (rows=652 width=8) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002)) - Please refer to the previous TableScan [TS_62] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] - PartitionCols:_col0 - Select Operator [SEL_325] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_324] (rows=525327388 width=114) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_323] - Group By Operator [GBY_322] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_302] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] + Select Operator [SEL_319] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_318] (rows=525327388 width=114) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + Group By Operator [GBY_300] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_294] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_288] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_315] PartitionCols:_col0 - Select Operator [SEL_320] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_319] (rows=17130654 width=212) - predicate:(_col3 > 0) - Select Operator [SEL_318] (rows=51391963 width=212) - Output:["_col0","_col3"] - Group By Operator [GBY_317] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_275] (rows=51391963 width=391) - Conds:RS_11._col1=RS_315._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_274] (rows=51391963 width=115) - Conds:RS_311._col0=RS_295._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_286] (rows=652 width=8) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002)) - Please refer to the previous TableScan [TS_62] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] - PartitionCols:_col0 - Select Operator [SEL_310] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=143930993 width=119) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] + Group By Operator [GBY_314] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_15] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_273] (rows=187573258 width=377) + Conds:RS_11._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_313] + PartitionCols:_col0 + Select Operator [SEL_312] (rows=80000000 width=284) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_6] (rows=80000000 width=284) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_272] (rows=187573258 width=101) + Conds:RS_311._col0=RS_291._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_291] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_283] (rows=652 width=8) + predicate:((d_year = 1999) and (d_year) IN (1998, 1999)) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_309] (rows=525327388 width=114) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_308] + Group By Operator [GBY_307] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_303] + Group By Operator [GBY_299] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_292] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_287] diff --git ql/src/test/results/clientpositive/perf/tez/query11.q.out ql/src/test/results/clientpositive/perf/tez/query11.q.out index 02ab587dc1..935f3416f7 100644 --- ql/src/test/results/clientpositive/perf/tez/query11.q.out +++ ql/src/test/results/clientpositive/perf/tez/query11.q.out @@ -3,7 +3,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -18,17 +18,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -49,7 +48,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -61,15 +64,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -82,7 +88,7 @@ with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -97,17 +103,16 @@ with year_total as ( group by c_customer_id ,c_first_name ,c_last_name - ,d_year ,c_preferred_cust_flag ,c_birth_country ,c_login ,c_email_address - ,d_year + ,d_year union all select c_customer_id customer_id ,c_first_name customer_first_name ,c_last_name customer_last_name - ,c_preferred_cust_flag + ,c_preferred_cust_flag customer_preferred_cust_flag ,c_birth_country customer_birth_country ,c_login customer_login ,c_email_address customer_email_address @@ -128,7 +133,11 @@ with year_total as ( ,c_email_address ,d_year ) - select t_s_secyear.c_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_w_firstyear @@ -140,15 +149,18 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 - and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by t_s_secyear.c_preferred_cust_flag + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -190,25 +202,25 @@ Stage-0 Stage-1 Reducer 8 vectorized File Output Operator [FS_355] - Limit [LIM_354] (rows=100 width=85) + Limit [LIM_354] (rows=100 width=372) Number of rows:100 - Select Operator [SEL_353] (rows=12248093 width=85) - Output:["_col0"] + Select Operator [SEL_353] (rows=12248093 width=372) + Output:["_col0","_col1","_col2","_col3"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_93] - Select Operator [SEL_92] (rows=12248093 width=85) - Output:["_col0"] - Top N Key Operator [TNK_158] (rows=12248093 width=537) - keys:_col8,top n:100 - Filter Operator [FIL_91] (rows=12248093 width=537) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_288] (rows=24496186 width=537) - Conds:RS_88._col2=RS_352._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] + Select Operator [SEL_92] (rows=12248093 width=372) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_158] (rows=12248093 width=824) + keys:_col7, _col8, _col9, _col10,top n:100 + Filter Operator [FIL_91] (rows=12248093 width=824) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col11 / _col3))) ELSE ((0 > (_col11 / _col3))) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col5) > 0)) ELSE (false) END) END + Merge Join Operator [MERGEJOIN_288] (rows=24496186 width=824) + Conds:RS_88._col2=RS_352._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_352] PartitionCols:_col0 - Select Operator [SEL_351] (rows=80000000 width=297) - Output:["_col0","_col1","_col2"] + Select Operator [SEL_351] (rows=80000000 width=484) + Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_350] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 19 [SIMPLE_EDGE] @@ -238,7 +250,7 @@ Stage-0 Select Operator [SEL_292] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_289] (rows=652 width=8) - predicate:((d_year = 2002) and d_date_sk is not null) + predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_65] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 17 [SIMPLE_EDGE] vectorized @@ -299,7 +311,7 @@ Stage-0 Select Operator [SEL_294] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_291] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) + predicate:((d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_65] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_339] @@ -359,7 +371,7 @@ Stage-0 Select Operator [SEL_293] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_290] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) + predicate:((d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_65] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_329] diff --git ql/src/test/results/clientpositive/perf/tez/query4.q.out ql/src/test/results/clientpositive/perf/tez/query4.q.out index 1b2f165c24..89a44c0c2d 100644 --- ql/src/test/results/clientpositive/perf/tez/query4.q.out +++ ql/src/test/results/clientpositive/perf/tez/query4.q.out @@ -72,7 +72,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -90,12 +94,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -103,7 +107,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@catalog_sales @@ -186,7 +193,11 @@ union all ,c_email_address ,d_year ) - select t_s_secyear.customer_preferred_cust_flag + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country from year_total t_s_firstyear ,year_total t_s_secyear ,year_total t_c_firstyear @@ -204,12 +215,12 @@ union all and t_s_secyear.sale_type = 's' and t_c_secyear.sale_type = 'c' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.dyear = 2001 - and t_s_secyear.dyear = 2001+1 - and t_c_firstyear.dyear = 2001 - and t_c_secyear.dyear = 2001+1 - and t_w_firstyear.dyear = 2001 - and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 and t_s_firstyear.year_total > 0 and t_c_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 @@ -217,7 +228,10 @@ union all > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - order by t_s_secyear.customer_preferred_cust_flag + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@catalog_sales @@ -272,25 +286,25 @@ Stage-0 Stage-1 Reducer 10 vectorized File Output Operator [FS_570] - Limit [LIM_569] (rows=100 width=85) + Limit [LIM_569] (rows=100 width=372) Number of rows:100 - Select Operator [SEL_568] (rows=7323197 width=85) - Output:["_col0"] + Select Operator [SEL_568] (rows=7323197 width=372) + Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_141] - Select Operator [SEL_140] (rows=7323197 width=85) - Output:["_col0"] - Top N Key Operator [TNK_256] (rows=7323197 width=537) - keys:_col13,top n:100 - Filter Operator [FIL_139] (rows=7323197 width=537) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_473] (rows=14646395 width=537) - Conds:RS_136._col2=RS_567._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col13","_col14"] + Select Operator [SEL_140] (rows=7323197 width=372) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_256] (rows=7323197 width=824) + keys:_col12, _col13, _col14, _col15,top n:100 + Filter Operator [FIL_139] (rows=7323197 width=824) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col16 / _col3))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_473] (rows=14646395 width=824) + Conds:RS_136._col2=RS_567._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col12","_col13","_col14","_col15","_col16"] <-Reducer 30 [SIMPLE_EDGE] vectorized SHUFFLE [RS_567] PartitionCols:_col0 - Select Operator [SEL_566] (rows=80000000 width=297) - Output:["_col0","_col1","_col2"] + Select Operator [SEL_566] (rows=80000000 width=484) + Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_565] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 29 [SIMPLE_EDGE] @@ -320,7 +334,7 @@ Stage-0 Select Operator [SEL_478] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_474] (rows=652 width=8) - predicate:((d_year = 2002) and d_date_sk is not null) + predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_106] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 27 [SIMPLE_EDGE] vectorized @@ -435,7 +449,7 @@ Stage-0 Select Operator [SEL_481] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_477] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) + predicate:((d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_106] <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_546] @@ -495,7 +509,7 @@ Stage-0 Select Operator [SEL_480] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_476] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) + predicate:((d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_106] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_536] @@ -555,7 +569,7 @@ Stage-0 Select Operator [SEL_479] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_475] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) + predicate:((d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_106] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_526] diff --git ql/src/test/results/clientpositive/perf/tez/query74.q.out ql/src/test/results/clientpositive/perf/tez/query74.q.out index 4e92161c15..831ade0a61 100644 --- ql/src/test/results/clientpositive/perf/tez/query74.q.out +++ ql/src/test/results/clientpositive/perf/tez/query74.q.out @@ -4,14 +4,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -21,20 +21,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -47,15 +47,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@customer @@ -69,14 +69,14 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ss_net_paid) year_total + ,sum(ss_net_paid) year_total ,'s' sale_type from customer ,store_sales ,date_dim where c_customer_sk = ss_customer_sk and ss_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name @@ -86,20 +86,20 @@ with year_total as ( ,c_first_name customer_first_name ,c_last_name customer_last_name ,d_year as year - ,max(ws_net_paid) year_total + ,sum(ws_net_paid) year_total ,'w' sale_type from customer ,web_sales ,date_dim where c_customer_sk = ws_bill_customer_sk and ws_sold_date_sk = d_date_sk - and d_year in (2001,2001+1) + and d_year in (1998,1998+1) group by c_customer_id ,c_first_name ,c_last_name ,d_year ) - select + select t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name from year_total t_s_firstyear ,year_total t_s_secyear @@ -112,15 +112,15 @@ with year_total as ( and t_w_firstyear.sale_type = 'w' and t_s_secyear.sale_type = 's' and t_w_secyear.sale_type = 'w' - and t_s_firstyear.year = 2001 - and t_s_secyear.year = 2001+1 - and t_w_firstyear.year = 2001 - and t_w_secyear.year = 2001+1 + and t_s_firstyear.year = 1998 + and t_s_secyear.year = 1998+1 + and t_w_firstyear.year = 1998 + and t_w_secyear.year = 1998+1 and t_s_firstyear.year_total > 0 and t_w_firstyear.year_total > 0 and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - order by 2,1,3 + order by 3,1,2 limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@customer @@ -171,7 +171,7 @@ Stage-0 Select Operator [SEL_92] (rows=12248093 width=280) Output:["_col0","_col1","_col2"] Top N Key Operator [TNK_158] (rows=12248093 width=732) - keys:_col8, _col7, _col9,top n:100 + keys:_col9, _col7, _col8,top n:100 Filter Operator [FIL_91] (rows=12248093 width=732) predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col10 / _col3))) ELSE (false) END) ELSE (false) END Merge Join Operator [MERGEJOIN_288] (rows=24496186 width=732) @@ -180,12 +180,12 @@ Stage-0 SHUFFLE [RS_351] PartitionCols:_col0 Group By Operator [GBY_350] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_79] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_78] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7 Merge Join Operator [MERGEJOIN_285] (rows=187573258 width=377) Conds:RS_74._col1=RS_318._col0(Inner),Output:["_col2","_col5","_col6","_col7"] <-Map 26 [SIMPLE_EDGE] vectorized @@ -208,7 +208,7 @@ Stage-0 Select Operator [SEL_292] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_289] (rows=652 width=8) - predicate:((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) + predicate:((d_year = 1999) and (d_year) IN (1998, 1999) and d_date_sk is not null) TableScan [TS_65] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 17 [SIMPLE_EDGE] vectorized @@ -246,12 +246,12 @@ Stage-0 Select Operator [SEL_341] (rows=51391963 width=212) Output:["_col0","_col3"] Group By Operator [GBY_340] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_57] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7 Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=391) Conds:RS_53._col1=RS_321._col0(Inner),Output:["_col2","_col5","_col6","_col7"] <-Map 26 [SIMPLE_EDGE] vectorized @@ -269,7 +269,7 @@ Stage-0 Select Operator [SEL_294] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_291] (rows=652 width=8) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) + predicate:((d_year = 1998) and (d_year) IN (1998, 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_65] <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_339] @@ -306,12 +306,12 @@ Stage-0 Select Operator [SEL_331] (rows=80000000 width=212) Output:["_col0","_col3"] Group By Operator [GBY_330] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_36] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7 Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=377) Conds:RS_32._col1=RS_320._col0(Inner),Output:["_col2","_col5","_col6","_col7"] <-Map 26 [SIMPLE_EDGE] vectorized @@ -329,7 +329,7 @@ Stage-0 Select Operator [SEL_293] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_290] (rows=652 width=8) - predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) + predicate:((d_year = 1998) and (d_year) IN (1998, 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_65] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_329] @@ -357,12 +357,12 @@ Stage-0 Select Operator [SEL_323] (rows=51391963 width=212) Output:["_col0","_col1"] Group By Operator [GBY_322] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_16] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7 Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=391) Conds:RS_12._col1=RS_319._col0(Inner),Output:["_col2","_col5","_col6","_col7"] <-Map 26 [SIMPLE_EDGE] vectorized