diff --git a/data/files/tpcds-perf/metastore_export/csv/TABLE_PARAMS.txt b/data/files/tpcds-perf/metastore_export/csv/TABLE_PARAMS.txt new file mode 100644 index 0000000..9aa385a --- /dev/null +++ b/data/files/tpcds-perf/metastore_export/csv/TABLE_PARAMS.txt @@ -0,0 +1,102 @@ +_store_,COLUMN_STATS_ACCURATE,true +_store_,numFiles,1 +_store_,numRows,1704 +_store_,rawDataSize,3256276 +_store_,totalSize,101707 +_store_,transient_lastDdlTime,143456_customer_demographics_ +_call_center_,COLUMN_STATS_ACCURATE,true +_call_center_,numFiles,1 +_call_center_,numRows,60 +_call_center_,rawDataSize,122700 +_call_center_,totalSize,10347 +_call_center_,transient_lastDdlTime,1434561922 +_catalog_page_,COLUMN_STATS_ACCURATE,true +_catalog_page_,numFiles,1 +_catalog_page_,numRows,46000 +_catalog_page_,rawDataSize,21198808 +_catalog_page_,totalSize,1576662 +_catalog_page_,transient_lastDdlTime,1434561925 +_customer_,COLUMN_STATS_ACCURATE,true +_customer_,numFiles,538 +_customer_,numRows,80000000 +_customer_,rawDataSize,68801615852 +_customer_,totalSize,3143935054 +_customer_,transient_lastDdlTime,1434561966 +_customer_address_,COLUMN_STATS_ACCURATE,true +_customer_address_,numFiles,274 +_customer_address_,numRows,40000000 +_customer_address_,rawDataSize,40595195284 +_customer_address_,totalSize,530195843 +_customer_address_,transient_lastDdlTime,1434561994 +_customer_demographics_,COLUMN_STATS_ACCURATE,true +_customer_demographics_,numFiles,8 +_customer_demographics_,numRows,_time_dim_800 +_customer_demographics_,rawDataSize,717_income_band_59 +_customer_demographics_,totalSize,323062 +_customer_demographics_,transient_lastDdlTime,1434562071 +_date_dim_,COLUMN_STATS_ACCURATE,true +_date_dim_,numFiles,1 +_date_dim_,numRows,73049 +_date_dim_,rawDataSize,81741831 +_date_dim_,totalSize,362925 +_date_dim_,transient_lastDdlTime,1434562075 +_household_demographics_,COLUMN_STATS_ACCURATE,true +_household_demographics_,numFiles,1 +_household_demographics_,numRows,7200 +_household_demographics_,rawDataSize,770400 +_household_demographics_,totalSize,901 +_household_demographics_,transient_lastDdlTime,1434562078 +_income_band_,COLUMN_STATS_ACCURATE,true +_income_band_,numFiles,1 +_income_band_,numRows,20 +_income_band_,rawDataSize,240 +_income_band_,totalSize,399 +_income_band_,transient_lastDdlTime,1434562081 +_item_,COLUMN_STATS_ACCURATE,true +_item_,numFiles,9 +_item_,numRows,462000 +_item_,rawDataSize,663560457 +_item_,totalSize,29760748 +_item_,transient_lastDdlTime,1434562091 +_promotion_,COLUMN_STATS_ACCURATE,true +_promotion_,numFiles,1 +_promotion_,numRows,2300 +_promotion_,rawDataSize,2713420 +_promotion_,totalSize,63964 +_promotion_,transient_lastDdlTime,1434562093 +_reason_,COLUMN_STATS_ACCURATE,true +_reason_,numFiles,1 +_reason_,numRows,72 +_reason_,rawDataSize,14400 +_reason_,totalSize,1024 +_reason_,transient_lastDdlTime,1434562095 +_ship_mode_,COLUMN_STATS_ACCURATE,true +_ship_mode_,numFiles,0 +_ship_mode_,numRows,0 +_ship_mode_,rawDataSize,0 +_ship_mode_,totalSize,0 +_ship_mode_,transient_lastDdlTime,1434562097 +_time_dim_,COLUMN_STATS_ACCURATE,true +_time_dim_,numFiles,1 +_time_dim_,numRows,86400 +_time_dim_,rawDataSize,40694400 +_time_dim_,totalSize,133902 +_time_dim_,transient_lastDdlTime,1434562099 +_warehouse_,COLUMN_STATS_ACCURATE,true +_warehouse_,numFiles,1 +_warehouse_,numRows,27 +_warehouse_,rawDataSize,27802 +_warehouse_,totalSize,2971 +_warehouse_,transient_lastDdlTime,1434562102 +_web_page_,COLUMN_STATS_ACCURATE,true +_web_page_,numFiles,1 +_web_page_,numRows,4602 +_web_page_,rawDataSize,2696178 +_web_page_,totalSize,50572 +_web_page_,transient_lastDdlTime,1434562104 +_web_site_,COLUMN_STATS_ACCURATE,true +_web_site_,numFiles,1 +_web_site_,numRows,84 +_web_site_,rawDataSize,155408 +_web_site_,totalSize,11271 +_web_site_,transient_lastDdlTime,1434562107 diff --git a/data/files/tpcds-perf/metastore_export/csv/TAB_COL_STATS.txt b/data/files/tpcds-perf/metastore_export/csv/TAB_COL_STATS.txt new file mode 100644 index 0000000..85b159b --- /dev/null +++ b/data/files/tpcds-perf/metastore_export/csv/TAB_COL_STATS.txt @@ -0,0 +1,259 @@ +6196,,s_store_sk,int,default,,,,,1434571564,1704,1,,1507,,0,,_store_,store +6197,16,s_store_id,string,default,,,,,1434571564,,,16,822,,0,,_store_,store +6198,9.94131455399061,s_rec_start_date,string,default,,,,,1434571564,,,10,5,,0,,_store_,store +6199,5,s_rec_end_date,string,default,,,,,1434571564,,,10,4,,0,,_store_,store +6200,,s_closed_date_sk,int,default,,,,,1434571564,2451314,2450820,,214,,1211,,_store_,store +6201,3.91138497652582,s_store_name,string,default,,,,,1434571564,,,5,11,,0,,_store_,store +6202,,s_number_employees,int,default,,,,,1434571564,300,200,,63,,13,,_store_,store +6203,,s_floor_space,int,default,,,,,1434571564,9997773,5000201,,1323,,12,,_store_,store +6204,7.07746478873239,s_hours,string,default,,,,,1434571564,,,8,4,,0,,_store_,store +6205,12.6584507042254,s_manager,string,default,,,,,1434571564,,,21,1213,,0,,_store_,store +6206,,s_market_id,int,default,,,,,1434571564,10,1,,11,,15,,_store_,store +6207,6.94659624413146,s_geography_class,string,default,,,,,1434571564,,,7,2,,0,,_store_,store +6208,57.7194835680751,s_market_desc,string,default,,,,,1434571564,,,100,1382,,0,,_store_,store +6209,12.781103286385,s_market_manager,string,default,,,,,1434571564,,,20,896,,0,,_store_,store +6210,,s_division_id,int,default,,,,,1434571564,1,1,,1,,10,,_store_,store +6211,6.95070422535211,s_division_name,string,default,,,,,1434571564,,,7,2,,0,,_store_,store +6212,,s_company_id,int,default,,,,,1434571564,1,1,,1,,13,,_store_,store +6213,6.95070422535211,s_company_name,string,default,,,,,1434571564,,,7,2,,0,,_store_,store +6214,2.87793427230047,s_street_number,string,default,,,,,1434571564,,,4,633,,0,,_store_,store +6215,8.58509389671362,s_street_name,string,default,,,,,1434571564,,,19,1020,,0,,_store_,store +6216,4.19307511737089,s_street_type,string,default,,,,,1434571564,,,9,23,,0,,_store_,store +6217,7.83978873239437,s_suite_number,string,default,,,,,1434571564,,,9,86,,0,,_store_,store +6218,8.53462441314554,s_city,string,default,,,,,1434571564,,,15,244,,0,,_store_,store +6219,13.9237089201878,s_county,string,default,,,,,1434571564,,,22,122,,0,,_store_,store +6220,1.98591549295775,s_state,string,default,,,,,1434571564,,,2,49,,0,,_store_,store +6221,4.96478873239437,s_zip,string,default,,,,,1434571564,,,5,936,,0,,_store_,store +6222,12.931338028169,s_country,string,default,,,,,1434571564,,,13,2,,0,,_store_,store +6223,,s_gmt_offset,float,default,,,-5,-9,1434571564,,,,5,,9,,_store_,store +6224,,s_tax_precentage,float,default,,,0.109999999403954,0,1434571564,,,,15,,12,,_store_,store +6225,,cc_call_center_sk,int,default,,,,,1434571573,60,1,,37,,0,,_call_center_,call_center +6226,16,cc_call_center_id,string,default,,,,,1434571573,,,16,30,,0,,_call_center_,call_center +6227,10,cc_rec_start_date,string,default,,,,,1434571573,,,10,4,,0,,_call_center_,call_center +6228,5,cc_rec_end_date,string,default,,,,,1434571573,,,10,4,,0,,_call_center_,call_center +6229,,cc_closed_date_sk,int,default,,,,,1434571573,,,,1,,60,,_call_center_,call_center +6230,,cc_open_date_sk,int,default,,,,,1434571573,2451146,2450794,,21,,0,,_call_center_,call_center +6231,13.85,cc_name,string,default,,,,,1434571573,,,19,29,,0,,_call_center_,call_center +6232,5.43333333333333,cc_class,string,default,,,,,1434571573,,,6,3,,0,,_call_center_,call_center +6233,,cc_employees,int,default,,,,,1434571573,1963174023,5412266,,39,,0,,_call_center_,call_center +6234,,cc_sq_ft,int,default,,,,,1434571573,2044891959,-2108783316,,56,,0,,_call_center_,call_center +6235,7.1,cc_hours,string,default,,,,,1434571573,,,8,3,,0,,_call_center_,call_center +6236,12.2333333333333,cc_manager,string,default,,,,,1434571573,,,17,47,,0,,_call_center_,call_center +6237,,cc_mkt_id,int,default,,,,,1434571573,6,1,,5,,0,,_call_center_,call_center +6238,34.7,cc_mkt_class,string,default,,,,,1434571573,,,50,43,,0,,_call_center_,call_center +6239,61.8666666666667,cc_mkt_desc,string,default,,,,,1434571573,,,100,41,,0,,_call_center_,call_center +6240,12.3833333333333,cc_market_manager,string,default,,,,,1434571573,,,17,45,,0,,_call_center_,call_center +6241,,cc_division,int,default,,,,,1434571573,6,1,,5,,0,,_call_center_,call_center +6242,3.96666666666667,cc_division_name,string,default,,,,,1434571573,,,5,7,,0,,_call_center_,call_center +6243,,cc_company,int,default,,,,,1434571573,6,1,,5,,0,,_call_center_,call_center +6244,3.8,cc_company_name,string,default,,,,,1434571573,,,5,7,,0,,_call_center_,call_center +6245,2.9,cc_street_number,string,default,,,,,1434571573,,,3,31,,0,,_call_center_,call_center +6246,8.16666666666667,cc_street_name,string,default,,,,,1434571573,,,16,28,,0,,_call_center_,call_center +6247,4.28333333333333,cc_street_type,string,default,,,,,1434571573,,,9,14,,0,,_call_center_,call_center +6248,7.63333333333333,cc_suite_number,string,default,,,,,1434571573,,,9,26,,0,,_call_center_,call_center +6249,9.01666666666667,cc_city,string,default,,,,,1434571573,,,15,33,,0,,_call_center_,call_center +6250,14.1166666666667,cc_county,string,default,,,,,1434571573,,,21,31,,0,,_call_center_,call_center +6251,2,cc_state,string,default,,,,,1434571573,,,2,18,,0,,_call_center_,call_center +6252,5,cc_zip,string,default,,,,,1434571573,,,5,20,,0,,_call_center_,call_center +6253,13,cc_country,string,default,,,,,1434571573,,,13,1,,0,,_call_center_,call_center +6254,,cc_gmt_offset,float,default,,,-5,-8,1434571573,,,,3,,0,,_call_center_,call_center +6255,,cc_tax_percentage,float,default,,,0.119999997317791,0,1434571573,,,,17,,0,,_call_center_,call_center +6256,,cp_catalog_page_sk,int,default,,,,,1434571586,46000,1,,62562,,0,,_catalog_page_,catalog_page +6257,16,cp_catalog_page_id,string,default,,,,,1434571586,,,16,38846,,0,,_catalog_page_,catalog_page +6258,,cp_start_date_sk,int,default,,,,,1434571586,2453005,2450815,,112,,444,,_catalog_page_,catalog_page +6259,,cp_end_date_sk,int,default,,,,,1434571586,2453186,2450844,,86,,446,,_catalog_page_,catalog_page +6260,9.90826086956522,cp_department,string,default,,,,,1434571586,,,10,2,,0,,_catalog_page_,catalog_page +6261,,cp_catalog_number,int,default,,,,,1434571586,109,1,,66,,465,,_catalog_page_,catalog_page +6262,,cp_catalog_page_number,int,default,,,,,1434571586,425,1,,224,,443,,_catalog_page_,catalog_page +6263,73.8601956521739,cp_description,string,default,,,,,1434571586,,,99,48242,,0,,_catalog_page_,catalog_page +6264,7.59682608695652,cp_type,string,default,,,,,1434571586,,,9,5,,0,,_catalog_page_,catalog_page +6265,,c_customer_sk,int,default,,,,,1434571680,80000000,1,,72955234,,0,,_customer_,customer +6266,16,c_customer_id,string,default,,,,,1434571680,,,16,66900244,,0,,_customer_,customer +6267,,c_current_cdemo_sk,int,default,,,,,1434571680,_time_dim_800,1,,1835839,,2798616,,_customer_,customer +6268,,c_current_hdemo_sk,int,default,,,,,1434571680,7200,1,,9299,,2799006,,_customer_,customer +6269,,c_current_addr_sk,int,default,,,,,1434571680,40000000,1,,45300013,,0,,_customer_,customer +6270,,c_first_shipto_date_sk,int,default,,,,,1434571680,2452678,2449028,,3585,,2800717,,_customer_,customer +6271,,c_first_sales_date_sk,int,default,,,,,1434571680,2452648,2448998,,3585,,2799514,,_customer_,customer +6272,3.1285808875,c_salutation,string,default,,,,,1434571680,,,4,9,,0,,_customer_,customer +6273,5.6321494125,c_first_name,string,default,,,,,1434571680,,,11,5529,,0,,_customer_,customer +6274,5.9160140875,c_last_name,string,default,,,,,1434571680,,,13,7820,,0,,_customer_,customer +6275,0.96499295,c_preferred_cust_flag,string,default,,,,,1434571680,,,1,3,,0,,_customer_,customer +6276,,c_birth_day,int,default,,,,,1434571680,31,1,,23,,2799325,,_customer_,customer +6277,,c_birth_month,int,default,,,,,1434571680,12,1,,13,,2800192,,_customer_,customer +6278,,c_birth_year,int,default,,,,,1434571680,1992,1924,,49,,2798667,,_customer_,customer +6279,8.3879500875,c_birth_country,string,default,,,,,1434571680,,,20,214,,0,,_customer_,customer +6280,0,c_login,string,default,,,,,1434571680,,,0,2,,0,,_customer_,customer +6281,26.5065306,c_email_address,string,default,,,,,1434571680,,,48,112512351,,0,,_customer_,customer +6282,6.7549565125,c_last_review_date,string,default,,,,,1434571680,,,7,224,,0,,_customer_,customer +6283,,ca_address_sk,int,default,,,,,1434571715,40000000,1,,53871098,,0,,_customer_address_,customer_address +6284,16,ca_address_id,string,default,,,,,1434571715,,,16,47305616,,0,,_customer_address_,customer_address +6285,2.806232675,ca_street_number,string,default,,,,,1434571715,,,4,822,,0,,_customer_address_,customer_address +6286,8.44949205,ca_street_name,string,default,,,,,1434571715,,,21,7488,,0,,_customer_address_,customer_address +6287,4.073537425,ca_street_type,string,default,,,,,1434571715,,,9,23,,0,,_customer_address_,customer_address +6288,7.653062625,ca_suite_number,string,default,,,,,1434571715,,,9,86,,0,,_customer_address_,customer_address +6289,8.684372925,ca_city,string,default,,,,,1434571715,,,20,896,,0,,_customer_address_,customer_address +6290,13.543959525,ca_county,string,default,,,,,1434571715,,,28,1716,,0,,_customer_address_,customer_address +6291,1.9400121,ca_state,string,default,,,,,1434571715,,,2,51,,0,,_customer_address_,customer_address +6292,4.849962,ca_zip,string,default,,,,,1434571715,,,5,10141,,0,,_customer_address_,customer_address +6293,12.610234325,ca_country,string,default,,,,,1434571715,,,13,2,,0,,_customer_address_,customer_address +6294,,ca_gmt_offset,float,default,,,-5,-10,1434571715,,,,5,,1201179,,_customer_address_,customer_address +6295,8.729921525,ca_location_type,string,default,,,,,1434571715,,,13,4,,0,,_customer_address_,customer_address +6296,,cd_demo_sk,int,default,,,,,1434571729,_time_dim_800,1,,1835839,,0,,_customer_demographics_,customer_demographics +6297,1,cd_gender,string,default,,,,,1434571729,,,1,2,,0,,_customer_demographics_,customer_demographics +6298,1,cd_marital_status,string,default,,,,,1434571729,,,1,7,,0,,_customer_demographics_,customer_demographics +6299,9.57142857142857,cd_education_status,string,default,,,,,1434571729,,,15,9,,0,,_customer_demographics_,customer_demographics +6300,,cd_purchase_estimate,int,default,,,,,1434571729,10000,500,,24,,0,,_customer_demographics_,customer_demographics +6301,7,cd_credit_rating,string,default,,,,,1434571729,,,9,5,,0,,_customer_demographics_,customer_demographics +6302,,cd_dep_count,int,default,,,,,1434571729,6,0,,5,,0,,_customer_demographics_,customer_demographics +6303,,cd_dep_employed_count,int,default,,,,,1434571729,6,0,,5,,0,,_customer_demographics_,customer_demographics +6304,,cd_dep_college_count,int,default,,,,,1434571729,6,0,,5,,0,,_customer_demographics_,customer_demographics +6305,,d_date_sk,int,default,,,,,1434571736,2488070,2415022,,65332,,0,,_date_dim_,date_dim +6306,16,d_date_id,string,default,,,,,1434571736,,,16,109875,,0,,_date_dim_,date_dim +6307,10,d_date,string,default,,,,,1434571736,,,10,92393,,0,,_date_dim_,date_dim +6308,,d_month_seq,int,default,,,,,1434571736,2400,0,,2764,,0,,_date_dim_,date_dim +6309,,d_week_seq,int,default,,,,,1434571736,10436,1,,13152,,0,,_date_dim_,date_dim +6310,,d_quarter_seq,int,default,,,,,1434571736,801,1,,429,,0,,_date_dim_,date_dim +6311,,d_year,int,default,,,,,1434571736,2100,1900,,112,,0,,_date_dim_,date_dim +6312,,d_dow,int,default,,,,,1434571736,6,0,,5,,0,,_date_dim_,date_dim +6313,,d_moy,int,default,,,,,1434571736,12,1,,13,,0,,_date_dim_,date_dim +6314,,d_dom,int,default,,,,,1434571736,31,1,,23,,0,,_date_dim_,date_dim +6315,,d_qoy,int,default,,,,,1434571736,4,1,,5,,0,,_date_dim_,date_dim +6316,,d_fy_year,int,default,,,,,1434571736,2100,1900,,112,,0,,_date_dim_,date_dim +6317,,d_fy_quarter_seq,int,default,,,,,1434571736,801,1,,429,,0,,_date_dim_,date_dim +6318,,d_fy_week_seq,int,default,,,,,1434571736,10436,1,,13152,,0,,_date_dim_,date_dim +6319,7.14286300976057,d_day_name,string,default,,,,,1434571736,,,9,9,,0,,_date_dim_,date_dim +6320,6,d_quarter_name,string,default,,,,,1434571736,,,6,721,,0,,_date_dim_,date_dim +6321,1,d_holiday,string,default,,,,,1434571736,,,1,2,,0,,_date_dim_,date_dim +6322,1,d_weekend,string,default,,,,,1434571736,,,1,2,,0,,_date_dim_,date_dim +6323,1,d_following_holiday,string,default,,,,,1434571736,,,1,2,,0,,_date_dim_,date_dim +6324,,d_first_dom,int,default,,,,,1434571736,2488070,2415021,,2226,,0,,_date_dim_,date_dim +6325,,d_last_dom,int,default,,,,,1434571736,2488372,2415020,,2535,,0,,_date_dim_,date_dim +6326,,d_same_day_ly,int,default,,,,,1434571736,2487705,2414657,,65332,,0,,_date_dim_,date_dim +6327,,d_same_day_lq,int,default,,,,,1434571736,2487978,2414930,,65332,,0,,_date_dim_,date_dim +6328,1,d_current_day,string,default,,,,,1434571736,,,1,1,,0,,_date_dim_,date_dim +6329,1,d_current_week,string,default,,,,,1434571736,,,1,1,,0,,_date_dim_,date_dim +6330,1,d_current_month,string,default,,,,,1434571736,,,1,2,,0,,_date_dim_,date_dim +6331,1,d_current_quarter,string,default,,,,,1434571736,,,1,2,,0,,_date_dim_,date_dim +6332,1,d_current_year,string,default,,,,,1434571736,,,1,2,,0,,_date_dim_,date_dim +6333,,hd_demo_sk,int,default,,,,,1434571741,7200,1,,9299,,0,,_household_demographics_,household_demographics +6334,,hd_income_band_sk,int,default,,,,,1434571741,20,1,,18,,0,,_household_demographics_,household_demographics +6335,7.5,hd_buy_potential,string,default,,,,,1434571741,,,10,5,,0,,_household_demographics_,household_demographics +6336,,hd_dep_count,int,default,,,,,1434571741,9,0,,11,,0,,_household_demographics_,household_demographics +6337,,hd_vehicle_count,int,default,,,,,1434571741,4,-1,,6,,0,,_household_demographics_,household_demographics +6338,,ib_income_band_sk,int,default,,,,,1434571745,20,1,,18,,0,,_income_band_,income_band +6339,,ib_lower_bound,int,default,,,,,1434571745,190001,0,,21,,0,,_income_band_,income_band +6340,,ib_upper_bound,int,default,,,,,1434571745,200000,10000,,18,,0,,_income_band_,income_band +6341,,i_item_sk,int,default,,,,,1434571764,462000,1,,439501,,0,,_item_,item +6342,16,i_item_id,string,default,,,,,1434571764,,,16,310774,,0,,_item_,item +6343,9.97480519480519,i_rec_start_date,string,default,,,,,1434571764,,,10,5,,0,,_item_,item +6344,5,i_rec_end_date,string,default,,,,,1434571764,,,10,5,,0,,_item_,item +6345,100.203757575758,i_item_desc,string,default,,,,,1434571764,,,200,338901,,0,,_item_,item +6346,,i_current_price,float,default,,,99.9899978637695,0.0900000035762787,1434571764,,,,12060,,1167,,_item_,item +6347,,i_wholesale_cost,float,default,,,89.7399978637695,0.0199999995529652,1434571764,,,,7820,,1120,,_item_,item +6348,,i_brand_id,int,default,,,,,1434571764,10016017,1001001,,633,,1139,,_item_,item +6349,16.1186558441558,i_brand,string,default,,,,,1434571764,,,22,633,,0,,_item_,item +6350,,i_class_id,int,default,,,,,1434571764,16,1,,15,,1117,,_item_,item +6351,7.76902164502164,i_class,string,default,,,,,1434571764,,,15,102,,0,,_item_,item +6352,,i_category_id,int,default,,,,,1434571764,10,1,,11,,1141,,_item_,item +6353,5.88895454545454,i_category,string,default,,,,,1434571764,,,11,10,,0,,_item_,item +6354,,i_manufact_id,int,default,,,,,1434571764,1000,1,,691,,1152,,_item_,item +6355,11.2630519480519,i_manufact,string,default,,,,,1434571764,,,15,1267,,0,,_item_,item +6356,4.32115800865801,i_size,string,default,,,,,1434571764,,,11,9,,0,,_item_,item +6357,19.9492207792208,i_formulation,string,default,,,,,1434571764,,,20,310774,,0,,_item_,item +6358,5.36743939393939,i_color,string,default,,,,,1434571764,,,10,66,,0,,_item_,item +6359,4.17825541125541,i_units,string,default,,,,,1434571764,,,7,18,,0,,_item_,item +6360,6.9825,i_container,string,default,,,,,1434571764,,,7,2,,0,,_item_,item +6361,,i_manager_id,int,default,,,,,1434571764,100,1,,63,,1124,,_item_,item +6362,22.8322532467532,i_product_name,string,default,,,,,1434571764,,,30,522658,,0,,_item_,item +6363,,p_promo_sk,int,default,,,,,1434571768,2300,1,,2764,,0,,_promotion_,promotion +6364,16,p_promo_id,string,default,,,,,1434571768,,,16,2427,,0,,_promotion_,promotion +6365,,p_start_date_sk,int,default,,,,,1434571768,2450915,2450096,,822,,30,,_promotion_,promotion +6366,,p_end_date_sk,int,default,,,,,1434571768,2450970,2450102,,1066,,26,,_promotion_,promotion +6367,,p_item_sk,int,default,,,,,1434571768,461932,614,,2132,,28,,_promotion_,promotion +6368,,p_cost,float,default,,,1000,1000,1434571768,,,,1,,28,,_promotion_,promotion +6369,,p_response_target,int,default,,,,,1434571768,1,1,,1,,33,,_promotion_,promotion +6370,3.94695652173913,p_promo_name,string,default,,,,,1434571768,,,5,11,,0,,_promotion_,promotion +6371,0.989565217391304,p_channel_dmail,string,default,,,,,1434571768,,,1,3,,0,,_promotion_,promotion +6372,0.988260869565217,p_channel_email,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6373,0.989565217391304,p_channel_catalog,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6374,0.988260869565217,p_channel_tv,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6375,0.987826086956522,p_channel_radio,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6376,0.988695652173913,p_channel_press,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6377,0.989565217391304,p_channel_event,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6378,0.988260869565217,p_channel_demo,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6379,39.2652173913044,p_channel_details,string,default,,,,,1434571768,,,60,2041,,0,,_promotion_,promotion +6380,6.92391304347826,p_purpose,string,default,,,,,1434571768,,,7,2,,0,,_promotion_,promotion +6381,0.986521739130435,p_discount_active,string,default,,,,,1434571768,,,1,2,,0,,_promotion_,promotion +6382,,r_reason_sk,int,default,,,,,1434571773,72,1,,47,,0,,_reason_,reason +6383,16,r_reason_id,string,default,,,,,1434571773,,,16,61,,0,,_reason_,reason +6384,12.6527777777778,r_reason_desc,string,default,,,,,1434571773,,,43,112,,0,,_reason_,reason +6385,,sm_ship_mode_sk,int,default,,,,,1434571776,,,,0,,0,,_ship_mode_,ship_mode +6386,0,sm_ship_mode_id,string,default,,,,,1434571776,,,0,0,,0,,_ship_mode_,ship_mode +6387,0,sm_type,string,default,,,,,1434571776,,,0,0,,0,,_ship_mode_,ship_mode +6388,0,sm_code,string,default,,,,,1434571776,,,0,0,,0,,_ship_mode_,ship_mode +6389,0,sm_carrier,string,default,,,,,1434571776,,,0,0,,0,,_ship_mode_,ship_mode +6390,0,sm_contract,string,default,,,,,1434571776,,,0,0,,0,,_ship_mode_,ship_mode +6391,,t_time_sk,int,default,,,,,1434571782,86399,0,,125124,,0,,_time_dim_,time_dim +6392,16,t_time_id,string,default,,,,,1434571782,,,16,71245,,0,,_time_dim_,time_dim +6393,,t_time,int,default,,,,,1434571782,86399,0,,125124,,0,,_time_dim_,time_dim +6394,,t_hour,int,default,,,,,1434571782,23,0,,19,,0,,_time_dim_,time_dim +6395,,t_minute,int,default,,,,,1434571782,59,0,,37,,0,,_time_dim_,time_dim +6396,,t_second,int,default,,,,,1434571782,59,0,,37,,0,,_time_dim_,time_dim +6397,2,t_am_pm,string,default,,,,,1434571782,,,2,2,,0,,_time_dim_,time_dim +6398,5.33333333333333,t_shift,string,default,,,,,1434571782,,,6,3,,0,,_time_dim_,time_dim +6399,6.91666666666667,t_sub_shift,string,default,,,,,1434571782,,,9,5,,0,,_time_dim_,time_dim +6400,2.875,t_meal_time,string,default,,,,,1434571782,,,9,4,,0,,_time_dim_,time_dim +6401,,w_warehouse_sk,int,default,,,,,1434571790,27,1,,22,,0,,_warehouse_,warehouse +6402,16,w_warehouse_id,string,default,,,,,1434571790,,,16,22,,0,,_warehouse_,warehouse +6403,15.5555555555556,w_warehouse_name,string,default,,,,,1434571790,,,20,21,,0,,_warehouse_,warehouse +6404,,w_warehouse_sq_ft,int,default,,,,,1434571790,977787,73065,,37,,1,,_warehouse_,warehouse +6405,2.77777777777778,w_street_number,string,default,,,,,1434571790,,,3,28,,0,,_warehouse_,warehouse +6406,8.40740740740741,w_street_name,string,default,,,,,1434571790,,,15,23,,0,,_warehouse_,warehouse +6407,3.92592592592593,w_street_type,string,default,,,,,1434571790,,,9,18,,0,,_warehouse_,warehouse +6408,7.44444444444444,w_suite_number,string,default,,,,,1434571790,,,9,21,,0,,_warehouse_,warehouse +6409,9.14814814814815,w_city,string,default,,,,,1434571790,,,15,19,,0,,_warehouse_,warehouse +6410,14.4074074074074,w_county,string,default,,,,,1434571790,,,17,15,,0,,_warehouse_,warehouse +6411,2,w_state,string,default,,,,,1434571790,,,2,15,,0,,_warehouse_,warehouse +6412,5,w_zip,string,default,,,,,1434571790,,,5,17,,0,,_warehouse_,warehouse +6413,13,w_country,string,default,,,,,1434571790,,,13,1,,0,,_warehouse_,warehouse +6414,,w_gmt_offset,float,default,,,-5,-8,1434571790,,,,3,,1,,_warehouse_,warehouse +6415,,wp_web_page_sk,int,default,,,,,1434571794,4602,1,,5529,,0,,_web_page_,web_page +6416,16,wp_web_page_id,string,default,,,,,1434571794,,,16,2647,,0,,_web_page_,web_page +6417,9.88700564971751,wp_rec_start_date,string,default,,,,,1434571794,,,10,4,,0,,_web_page_,web_page +6418,5,wp_rec_end_date,string,default,,,,,1434571794,,,10,4,,0,,_web_page_,web_page +6419,,wp_creation_date_sk,int,default,,,,,1434571794,2450815,2450492,,234,,64,,_web_page_,web_page +6420,,wp_access_date_sk,int,default,,,,,1434571794,2452648,2452548,,63,,58,,_web_page_,web_page +6421,0.984789222077358,wp_autogen_flag,string,default,,,,,1434571794,,,1,3,,0,,_web_page_,web_page +6422,,wp_customer_sk,int,default,,,,,1434571794,79895491,33025,,1382,,3263,,_web_page_,web_page +6423,17.8005215123859,wp_url,string,default,,,,,1434571794,,,18,2,,0,,_web_page_,web_page +6424,6.30704041720991,wp_type,string,default,,,,,1434571794,,,9,6,,0,,_web_page_,web_page +6425,,wp_char_count,int,default,,,,,1434571794,8523,303,,3585,,65,,_web_page_,web_page +6426,,wp_link_count,int,default,,,,,1434571794,25,2,,16,,50,,_web_page_,web_page +6427,,wp_image_count,int,default,,,,,1434571794,7,1,,6,,48,,_web_page_,web_page +6428,,wp_max_ad_count,int,default,,,,,1434571794,4,0,,5,,52,,_web_page_,web_page +6429,,web_site_sk,int,default,,,,,1434572787,84,1,,49,,0,,_web_site_,web_site +6430,16,web_site_id,string,default,,,,,1434572787,,,16,53,,0,,_web_site_,web_site +6431,9.76190476190476,web_rec_start_date,string,default,,,,,1434572787,,,10,5,,0,,_web_site_,web_site +6432,5,web_rec_end_date,string,default,,,,,1434572787,,,10,4,,0,,_web_site_,web_site +6433,6.14285714285714,web_name,string,default,,,,,1434572787,,,7,18,,0,,_web_site_,web_site +6434,,web_open_date_sk,int,default,,,,,1434572787,2450807,2450118,,41,,1,,_web_site_,web_site +6435,,web_close_date_sk,int,default,,,,,1434572787,2446218,2440993,,29,,15,,_web_site_,web_site +6436,6.91666666666667,web_class,string,default,,,,,1434572787,,,7,2,,0,,_web_site_,web_site +6437,12.4642857142857,web_manager,string,default,,,,,1434572787,,,19,61,,0,,_web_site_,web_site +6438,,web_mkt_id,int,default,,,,,1434572787,6,1,,5,,1,,_web_site_,web_site +6439,34.7142857142857,web_mkt_class,string,default,,,,,1434572787,,,49,79,,0,,_web_site_,web_site +6440,64.6190476190476,web_mkt_desc,string,default,,,,,1434572787,,,98,61,,0,,_web_site_,web_site +6441,12.7738095238095,web_market_manager,string,default,,,,,1434572787,,,18,58,,0,,_web_site_,web_site +6442,,web_company_id,int,default,,,,,1434572787,6,1,,5,,0,,_web_site_,web_site +6443,3.79761904761905,web_company_name,string,default,,,,,1434572787,,,5,7,,0,,_web_site_,web_site +6444,2.89285714285714,web_street_number,string,default,,,,,1434572787,,,3,51,,0,,_web_site_,web_site +6445,8.94047619047619,web_street_name,string,default,,,,,1434572787,,,16,102,,0,,_web_site_,web_site +6446,3.8452380952381,web_street_type,string,default,,,,,1434572787,,,9,23,,0,,_web_site_,web_site +6447,7.90476190476191,web_suite_number,string,default,,,,,1434572787,,,9,58,,0,,_web_site_,web_site +6448,8.73809523809524,web_city,string,default,,,,,1434572787,,,15,37,,0,,_web_site_,web_site +6449,13.8333333333333,web_county,string,default,,,,,1434572787,,,22,69,,0,,_web_site_,web_site +6450,2,web_state,string,default,,,,,1434572787,,,2,33,,0,,_web_site_,web_site +6451,5,web_zip,string,default,,,,,1434572787,,,5,56,,0,,_web_site_,web_site +6452,12.8452380952381,web_country,string,default,,,,,1434572787,,,13,2,,0,,_web_site_,web_site +6453,,web_gmt_offset,float,default,,,-5,-8,1434572787,,,,3,,1,,_web_site_,web_site +6454,,web_tax_percentage,float,default,,,0.119999997317791,0,1434572787,,,,17,,1,,_web_site_,web_site diff --git a/data/files/tpcds-perf/metastore_export/sql/TAB_COL_STATS.sql b/data/files/tpcds-perf/metastore_export/sql/TAB_COL_STATS.sql new file mode 100644 index 0000000..4814bc0 --- /dev/null +++ b/data/files/tpcds-perf/metastore_export/sql/TAB_COL_STATS.sql @@ -0,0 +1,5 @@ +DROP TABLE APP.TAB_COL_STATS; +CREATE TABLE APP.TAB_COL_STATS (CS_ID BIGINT NOT NULL, AVG_COL_LEN DOUBLE, COLUMN_NAME varchar(128), COLUMN_TYPE varchar(128) NOT NULL, DB_NAME varchar(128) NOT NULL, BIG_DECIMAL_HIGH_VALUE varchar(255), BIG_DECIMAL_LOW_VALUE varchar(255), DOUBLE_HIGH_VALUE double, DOUBLE_LOW_VALUE double, LAST_ANALYZED BIGINT NOT NULL, LONG_HIGH_VALUE BIGINT, LONG_LOW_VALUE BIGINT, MAX_COL_LEN BIGINT, NUM_DISTINCTS bigint, NUM_FALSES bigint, NUM_NULLS bigint NOT NULL, NUM_TRUES bigint, TBL_ID bigint, TABLE_NAME varchar(128) NOT NULL); +DROP TABLE "APP"."TABLE_PARAMS"; +CREATE TABLE "APP"."TABLE_PARAMS" ("TBL_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + diff --git a/data/scripts/q_perf_test_init.sql b/data/scripts/q_perf_test_init.sql new file mode 100644 index 0000000..d27215b --- /dev/null +++ b/data/scripts/q_perf_test_init.sql @@ -0,0 +1,616 @@ +drop table if exists call_center; +create external table call_center +( + cc_call_center_sk int, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk int, + cc_open_date_sk int, + cc_name string, + cc_class string, + cc_employees int, + cc_sq_ft int, + cc_hours string, + cc_manager string, + cc_mkt_id int, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division int, + cc_division_name string, + cc_company int, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset decimal(5,2), + cc_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + +drop table if exists catalog_page; +create external table catalog_page +( + cp_catalog_page_sk int, + cp_catalog_page_id string, + cp_start_date_sk int, + cp_end_date_sk int, + cp_department string, + cp_catalog_number int, + cp_catalog_page_number int, + cp_description string, + cp_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists catalog_returns; +create external table catalog_returns +( + cr_returned_date_sk int, + cr_returned_time_sk int, + cr_item_sk int, + cr_refunded_customer_sk int, + cr_refunded_cdemo_sk int, + cr_refunded_hdemo_sk int, + cr_refunded_addr_sk int, + cr_returning_customer_sk int, + cr_returning_cdemo_sk int, + cr_returning_hdemo_sk int, + cr_returning_addr_sk int, + cr_call_center_sk int, + cr_catalog_page_sk int, + cr_ship_mode_sk int, + cr_warehouse_sk int, + cr_reason_sk int, + cr_order_number int, + cr_return_quantity int, + cr_return_amount decimal(7,2), + cr_return_tax decimal(7,2), + cr_return_amt_inc_tax decimal(7,2), + cr_fee decimal(7,2), + cr_return_ship_cost decimal(7,2), + cr_refunded_cash decimal(7,2), + cr_reversed_charge decimal(7,2), + cr_store_credit decimal(7,2), + cr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists catalog_sales; +create external table catalog_sales +( + cs_sold_date_sk int, + cs_sold_time_sk int, + cs_ship_date_sk int, + cs_bill_customer_sk int, + cs_bill_cdemo_sk int, + cs_bill_hdemo_sk int, + cs_bill_addr_sk int, + cs_ship_customer_sk int, + cs_ship_cdemo_sk int, + cs_ship_hdemo_sk int, + cs_ship_addr_sk int, + cs_call_center_sk int, + cs_catalog_page_sk int, + cs_ship_mode_sk int, + cs_warehouse_sk int, + cs_item_sk int, + cs_promo_sk int, + cs_order_number int, + cs_quantity int, + cs_wholesale_cost decimal(7,2), + cs_list_price decimal(7,2), + cs_sales_price decimal(7,2), + cs_ext_discount_amt decimal(7,2), + cs_ext_sales_price decimal(7,2), + cs_ext_wholesale_cost decimal(7,2), + cs_ext_list_price decimal(7,2), + cs_ext_tax decimal(7,2), + cs_coupon_amt decimal(7,2), + cs_ext_ship_cost decimal(7,2), + cs_net_paid decimal(7,2), + cs_net_paid_inc_tax decimal(7,2), + cs_net_paid_inc_ship decimal(7,2), + cs_net_paid_inc_ship_tax decimal(7,2), + cs_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists customer; +create external table customer +( + c_customer_sk int, + c_customer_id string, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day int, + c_birth_month int, + c_birth_year int, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists customer_address; +create external table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset decimal(5,2), + ca_location_type string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists customer_demographics; +create external table customer_demographics +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists date_dim; +create external table date_dim +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists household_demographics; +create external table household_demographics +( + hd_demo_sk int, + hd_income_band_sk int, + hd_buy_potential string, + hd_dep_count int, + hd_vehicle_count int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists income_band; +create external table income_band +( + ib_income_band_sk int, + ib_lower_bound int, + ib_upper_bound int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists inventory; +create external table inventory +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists item; +create external table item +( + i_item_sk int, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price decimal(7,2), + i_wholesale_cost decimal(7,2), + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists promotion; +create external table promotion +( + p_promo_sk int, + p_promo_id string, + p_start_date_sk int, + p_end_date_sk int, + p_item_sk int, + p_cost decimal(15,2), + p_response_target int, + p_promo_name string, + p_channel_dmail string, + p_channel_email string, + p_channel_catalog string, + p_channel_tv string, + p_channel_radio string, + p_channel_press string, + p_channel_event string, + p_channel_demo string, + p_channel_details string, + p_purpose string, + p_discount_active string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists reason; +create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists ship_mode; +create external table ship_mode +( + sm_ship_mode_sk int, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists store; +create external table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset decimal(5,2), + s_tax_precentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists store_returns; +create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists store_sales; +create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists time_dim; +create external table time_dim +( + t_time_sk int, + t_time_id string, + t_time int, + t_hour int, + t_minute int, + t_second int, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists warehouse; +create external table warehouse +( + w_warehouse_sk int, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft int, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists web_page; +create external table web_page +( + wp_web_page_sk int, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk int, + wp_access_date_sk int, + wp_autogen_flag string, + wp_customer_sk int, + wp_url string, + wp_type string, + wp_char_count int, + wp_link_count int, + wp_image_count int, + wp_max_ad_count int +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists web_returns; +create external table web_returns +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt decimal(7,2), + wr_return_tax decimal(7,2), + wr_return_amt_inc_tax decimal(7,2), + wr_fee decimal(7,2), + wr_return_ship_cost decimal(7,2), + wr_refunded_cash decimal(7,2), + wr_reversed_charge decimal(7,2), + wr_account_credit decimal(7,2), + wr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists web_sales; +create external table web_sales +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost decimal(7,2), + ws_list_price decimal(7,2), + ws_sales_price decimal(7,2), + ws_ext_discount_amt decimal(7,2), + ws_ext_sales_price decimal(7,2), + ws_ext_wholesale_cost decimal(7,2), + ws_ext_list_price decimal(7,2), + ws_ext_tax decimal(7,2), + ws_coupon_amt decimal(7,2), + ws_ext_ship_cost decimal(7,2), + ws_net_paid decimal(7,2), + ws_net_paid_inc_tax decimal(7,2), + ws_net_paid_inc_ship decimal(7,2), + ws_net_paid_inc_ship_tax decimal(7,2), + ws_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + +drop table if exists web_site; +create external table web_site +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset decimal(5,2), + web_tax_percentage decimal(5,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); + + diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml index 8db6fb0..76ce591 100644 --- a/itests/qtest/pom.xml +++ b/itests/qtest/pom.xml @@ -396,6 +396,7 @@ + @@ -441,6 +442,23 @@ initScript="${initScript}" cleanupScript="q_test_cleanup.sql"/> + + + 500) and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and date_dim.d_qoy = 2 and date_dim.d_year = 2000 group by ca_zip order by ca_zip limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query17.q b/ql/src/test/queries/clientpositive/perf/query17.q new file mode 100644 index 0000000..bb8a33b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query17.q @@ -0,0 +1 @@ +explain select i_item_id ,i_item_desc ,s_state ,count(ss_quantity) as store_sales_quantitycount ,avg(ss_quantity) as store_sales_quantityave ,stddev_samp(ss_quantity) as store_sales_quantitystdev ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov ,count(sr_return_quantity) as_store_returns_quantitycount ,avg(sr_return_quantity) as_store_returns_quantityave ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_quarter_name = '2000Q1' and d1.d_date_sk = store_sales.ss_sold_date_sk and item.i_item_sk = store_sales.ss_item_sk and store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_customer_sk = store_returns.sr_customer_sk and store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number and store_returns.sr_returned_date_sk = d2.d_date_sk and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk and store_returns.sr_item_sk = catalog_sales.cs_item_sk and catalog_sales.cs_sold_date_sk = d3.d_date_sk and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') group by i_item_id ,i_item_desc ,s_state order by i_item_id ,i_item_desc ,s_state limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query18.q b/ql/src/test/queries/clientpositive/perf/query18.q new file mode 100644 index 0000000..e6061f5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query18.q @@ -0,0 +1 @@ +explain select i_item_id, ca_country, ca_state, ca_county, avg( cast(cs_quantity as decimal(12,2))) agg1, avg( cast(cs_list_price as decimal(12,2))) agg2, avg( cast(cs_coupon_amt as decimal(12,2))) agg3, avg( cast(cs_sales_price as decimal(12,2))) agg4, avg( cast(cs_net_profit as decimal(12,2))) agg5, avg( cast(c_birth_year as decimal(12,2))) agg6, avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 from catalog_sales, date_dim, customer_demographics cd1, item, customer, customer_address, customer_demographics cd2 where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_item_sk = item.i_item_sk and catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and cd1.cd_gender = 'M' and cd1.cd_education_status = 'College' and customer.c_current_cdemo_sk = cd2.cd_demo_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and c_birth_month in (9,5,12,4,1,10) and d_year = 2001 and ca_state in ('ND','WI','AL' ,'NC','OK','MS','TN') group by i_item_id, ca_country, ca_state, ca_county with rollup order by ca_country, ca_state, ca_county, i_item_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query19.q b/ql/src/test/queries/clientpositive/perf/query19.q new file mode 100644 index 0000000..cfb6218 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query19.q @@ -0,0 +1 @@ +explain select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item,customer,customer_address,store where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and i_manager_id=7 and d_moy=11 and d_year=1999 and store_sales.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and substr(ca_zip,1,5) <> substr(s_zip,1,5) and store_sales.ss_store_sk = store.s_store_sk group by i_brand ,i_brand_id ,i_manufact_id ,i_manufact order by ext_price desc ,i_brand ,i_brand_id ,i_manufact_id ,i_manufact limit 100 ; diff --git a/ql/src/test/queries/clientpositive/perf/query20.q b/ql/src/test/queries/clientpositive/perf/query20.q new file mode 100644 index 0000000..d4ea10d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query20.q @@ -0,0 +1 @@ +explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(cs_ext_sales_price) as itemrevenue ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over (partition by i_class) as revenueratio from catalog_sales ,item ,date_dim where catalog_sales.cs_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and d_date between '2001-01-12' and '2001-02-11' group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query22.q b/ql/src/test/queries/clientpositive/perf/query22.q new file mode 100644 index 0000000..17cece2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query22.q @@ -0,0 +1 @@ +explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query25.q b/ql/src/test/queries/clientpositive/perf/query25.q new file mode 100644 index 0000000..80185ab --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query25.q @@ -0,0 +1 @@ +explain select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_net_profit) as store_sales_profit ,sum(sr_net_loss) as store_returns_loss ,sum(cs_net_profit) as catalog_sales_profit from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 4 and d1.d_year = 1998 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 10 and d2.d_year = 1998 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_moy between 4 and 10 and d3.d_year = 1998 group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query26.q b/ql/src/test/queries/clientpositive/perf/query26.q new file mode 100644 index 0000000..0e81120 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query26.q @@ -0,0 +1 @@ +explain select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, avg(cs_coupon_amt) agg3, avg(cs_sales_price) agg4 from catalog_sales, customer_demographics, date_dim, item, promotion where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_item_sk = item.i_item_sk and catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk and catalog_sales.cs_promo_sk = promotion.p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query27.q b/ql/src/test/queries/clientpositive/perf/query27.q new file mode 100644 index 0000000..58be664 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query27.q @@ -0,0 +1 @@ +explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query29.q b/ql/src/test/queries/clientpositive/perf/query29.q new file mode 100644 index 0000000..1634b1f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query29.q @@ -0,0 +1 @@ +explain select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_quantity) as store_sales_quantity ,sum(sr_return_quantity) as store_returns_quantity ,sum(cs_quantity) as catalog_sales_quantity from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 2 and d1.d_year = 2000 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 2 and 2 + 3 and d2.d_year = 2000 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_year in (2000,2000+1,2000+2) group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query3.q b/ql/src/test/queries/clientpositive/perf/query3.q new file mode 100644 index 0000000..47dfac5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query3.q @@ -0,0 +1 @@ +explain select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) sum_agg from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 436 and dt.d_moy=12 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,sum_agg desc ,brand_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query31.q b/ql/src/test/queries/clientpositive/perf/query31.q new file mode 100644 index 0000000..147ad15 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query31.q @@ -0,0 +1 @@ +explain with ss as (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales from store_sales,date_dim,customer_address where ss_sold_date_sk = d_date_sk and ss_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year), ws as (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales from web_sales,date_dim,customer_address where ws_sold_date_sk = d_date_sk and ws_bill_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year) select ss1.ca_county ,ss1.d_year ,ws2.web_sales/ws1.web_sales web_q1_q2_increase ,ss2.store_sales/ss1.store_sales store_q1_q2_increase ,ws3.web_sales/ws2.web_sales web_q2_q3_increase ,ss3.store_sales/ss2.store_sales store_q2_q3_increase from ss ss1 ,ss ss2 ,ss ss3 ,ws ws1 ,ws ws2 ,ws ws3 where ss1.d_qoy = 1 and ss1.d_year = 1998 and ss1.ca_county = ss2.ca_county and ss2.d_qoy = 2 and ss2.d_year = 1998 and ss2.ca_county = ss3.ca_county and ss3.d_qoy = 3 and ss3.d_year = 1998 and ss1.ca_county = ws1.ca_county and ws1.d_qoy = 1 and ws1.d_year = 1998 and ws1.ca_county = ws2.ca_county and ws2.d_qoy = 2 and ws2.d_year = 1998 and ws1.ca_county = ws3.ca_county and ws3.d_qoy = 3 and ws3.d_year =1998 and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end order by web_q1_q2_increase; diff --git a/ql/src/test/queries/clientpositive/perf/query34.q b/ql/src/test/queries/clientpositive/perf/query34.q new file mode 100644 index 0000000..5fc82cb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query34.q @@ -0,0 +1 @@ +explain select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) and (household_demographics.hd_buy_potential = '1001-5000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and (case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end) > 1.2 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County', 'Orange County','Appanoose County','Franklin Parish','Tehama County') group by ss_ticket_number,ss_customer_sk) dn,customer where dn.ss_customer_sk = customer.c_customer_sk and cnt between 15 and 20 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc; diff --git a/ql/src/test/queries/clientpositive/perf/query39.q b/ql/src/test/queries/clientpositive/perf/query39.q new file mode 100644 index 0000000..abf2bdd --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query39.q @@ -0,0 +1 @@ +explain with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =1999 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=3 and inv2.d_moy=3+1 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov ; with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =1999 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=3 and inv2.d_moy=3+1 and inv1.cov > 1.5 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov ; diff --git a/ql/src/test/queries/clientpositive/perf/query40.q b/ql/src/test/queries/clientpositive/perf/query40.q new file mode 100644 index 0000000..2a6c273 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query40.q @@ -0,0 +1 @@ +explain select w_state ,i_item_id ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after from catalog_sales left outer join catalog_returns on (catalog_sales.cs_order_number = catalog_returns.cr_order_number and catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) ,warehouse ,item ,date_dim where i_current_price between 0.99 and 1.49 and item.i_item_sk = catalog_sales.cs_item_sk and catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and date_dim.d_date between '1998-03-09' and '1998-05-08' group by w_state,i_item_id order by w_state,i_item_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query42.q b/ql/src/test/queries/clientpositive/perf/query42.q new file mode 100644 index 0000000..c4b115b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query42.q @@ -0,0 +1 @@ +explain select dt.d_year ,item.i_category_id ,item.i_category ,sum(ss_ext_sales_price) as s from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_category_id ,item.i_category order by s desc,dt.d_year ,item.i_category_id ,item.i_category limit 100 ; diff --git a/ql/src/test/queries/clientpositive/perf/query43.q b/ql/src/test/queries/clientpositive/perf/query43.q new file mode 100644 index 0000000..f1c22e8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query43.q @@ -0,0 +1 @@ +explain select s_store_name, s_store_id, sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales from date_dim, store_sales, store where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk and s_gmt_offset = -6 and d_year = 1998 group by s_store_name, s_store_id order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query45.q b/ql/src/test/queries/clientpositive/perf/query45.q new file mode 100644 index 0000000..0b34bfd --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query45.q @@ -0,0 +1 @@ +explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN item ON web_sales.ws_item_sk = item.i_item_sk where ( item.i_item_id in (select i_item_id from item i2 where i2.i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) and d_qoy = 2 and d_year = 2000 group by ca_zip, ca_county order by ca_zip, ca_county limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query46.q b/ql/src/test/queries/clientpositive/perf/query46.q new file mode 100644 index 0000000..3e8711f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query46.q @@ -0,0 +1 @@ +explain select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,amt,profit from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 2) and date_dim.d_dow in (6,0) and date_dim.d_year in (1998,1998+1,1998+2) and store.s_city in ('Rosedale','Bethlehem','Clinton','Clifton','Springfield') group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr where dn.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query48.q b/ql/src/test/queries/clientpositive/perf/query48.q new file mode 100644 index 0000000..27fa4db --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query48.q @@ -0,0 +1 @@ +explain select sum (ss_quantity) from store_sales, store, customer_demographics, customer_address, date_dim where store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_year = 1998 and ( ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 100.00 and 150.00 ) or ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 50.00 and 100.00 ) or ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 150.00 and 200.00 ) ) and ( ( store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('KY', 'GA', 'NM') and ss_net_profit between 0 and 2000 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('MT', 'OR', 'IN') and ss_net_profit between 150 and 3000 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('WI', 'MO', 'WV') and ss_net_profit between 50 and 25000 ) ) ; diff --git a/ql/src/test/queries/clientpositive/perf/query50.q b/ql/src/test/queries/clientpositive/perf/query50.q new file mode 100644 index 0000000..b347f6f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query50.q @@ -0,0 +1 @@ +explain select s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as 30days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as 3160days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as 6190days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as 91120days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as 120days from store_sales ,store_returns ,store ,date_dim d1 ,date_dim d2 where d2.d_year = 2000 and d2.d_moy = 9 and store_sales.ss_ticket_number = store_returns.sr_ticket_number and store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_sold_date_sk = d1.d_date_sk and sr_returned_date_sk = d2.d_date_sk and store_sales.ss_customer_sk = store_returns.sr_customer_sk and store_sales.ss_store_sk = store.s_store_sk group by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip order by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query52.q b/ql/src/test/queries/clientpositive/perf/query52.q new file mode 100644 index 0000000..24d0ac3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query52.q @@ -0,0 +1 @@ +explain select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) ext_price from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,ext_price desc ,brand_id limit 100 ; diff --git a/ql/src/test/queries/clientpositive/perf/query54.q b/ql/src/test/queries/clientpositive/perf/query54.q new file mode 100644 index 0000000..b1a3e83 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query54.q @@ -0,0 +1 @@ +explain with my_customers as ( select c_customer_sk , c_current_addr_sk from ( select cs_sold_date_sk sold_date_sk, cs_bill_customer_sk customer_sk, cs_item_sk item_sk from catalog_sales union all select ws_sold_date_sk sold_date_sk, ws_bill_customer_sk customer_sk, ws_item_sk item_sk from web_sales ) cs_or_ws_sales, item, date_dim, customer where sold_date_sk = d_date_sk and item_sk = i_item_sk and i_category = 'Jewelry' and i_class = 'football' and c_customer_sk = cs_or_ws_sales.customer_sk and d_moy = 3 and d_year = 2000 group by c_customer_sk , c_current_addr_sk ) , my_revenue as ( select c_customer_sk, sum(ss_ext_sales_price) as revenue from my_customers, store_sales, customer_address, store, date_dim where c_current_addr_sk = ca_address_sk and ca_county = s_county and ca_state = s_state and ss_sold_date_sk = d_date_sk and c_customer_sk = ss_customer_sk and d_month_seq between (1203) and (1205) group by c_customer_sk ) , segments as (select cast((revenue/50) as int) as segment from my_revenue ) select segment, count(*) as num_customers, segment*50 as segment_base from segments group by segment order by segment, num_customers limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query55.q b/ql/src/test/queries/clientpositive/perf/query55.q new file mode 100644 index 0000000..644ba1a --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query55.q @@ -0,0 +1 @@ +explain select i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and i_manager_id=36 and d_moy=12 and d_year=2001 group by i_brand, i_brand_id order by ext_price desc, i_brand_id limit 100 ; diff --git a/ql/src/test/queries/clientpositive/perf/query64.q b/ql/src/test/queries/clientpositive/perf/query64.q new file mode 100644 index 0000000..7afb7d2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query64.q @@ -0,0 +1 @@ +explain select cs1.product_name ,cs1.store_name ,cs1.store_zip ,cs1.b_street_number ,cs1.b_streen_name ,cs1.b_city ,cs1.b_zip ,cs1.c_street_number ,cs1.c_street_name ,cs1.c_city ,cs1.c_zip ,cs1.syear ,cs1.cnt ,cs1.s1 ,cs1.s2 ,cs1.s3 ,cs2.s1 ,cs2.s2 ,cs2.s3 ,cs2.syear ,cs2.cnt from (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as store_name ,s_zip as store_zip ,ad1.ca_street_number as b_street_number ,ad1.ca_street_name as b_streen_name ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as c_street_number ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip as c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*) as cnt ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2 ,sum(ss_coupon_amt) as s3 FROM store_sales JOIN store_returns ON store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk= cd1.cd_demo_sk JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk = cd2.cd_demo_sk JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk = hd1.hd_demo_sk JOIN household_demographics hd2 ON customer.c_current_hdemo_sk = hd2.hd_demo_sk JOIN customer_address ad1 ON store_sales.ss_addr_sk = ad1.ca_address_sk JOIN customer_address ad2 ON customer.c_current_addr_sk = ad2.ca_address_sk JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk JOIN item ON store_sales.ss_item_sk = item.i_item_sk JOIN (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales JOIN catalog_returns ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk and catalog_sales.cs_order_number = catalog_returns.cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)) cs_ui ON store_sales.ss_item_sk = cs_ui.cs_item_sk WHERE cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and i_current_price between 35 and 35 + 10 and i_current_price between 35 + 1 and 35 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) cs1 JOIN (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as store_name ,s_zip as store_zip ,ad1.ca_street_number as b_street_number ,ad1.ca_street_name as b_streen_name ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as c_street_number ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip as c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*) as cnt ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2 ,sum(ss_coupon_amt) as s3 FROM store_sales JOIN store_returns ON store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk= cd1.cd_demo_sk JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk = cd2.cd_demo_sk JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk = hd1.hd_demo_sk JOIN household_demographics hd2 ON customer.c_current_hdemo_sk = hd2.hd_demo_sk JOIN customer_address ad1 ON store_sales.ss_addr_sk = ad1.ca_address_sk JOIN customer_address ad2 ON customer.c_current_addr_sk = ad2.ca_address_sk JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk JOIN item ON store_sales.ss_item_sk = item.i_item_sk JOIN (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales JOIN catalog_returns ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk and catalog_sales.cs_order_number = catalog_returns.cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)) cs_ui ON store_sales.ss_item_sk = cs_ui.cs_item_sk WHERE cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and i_current_price between 35 and 35 + 10 and i_current_price between 35 + 1 and 35 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) cs2 ON cs1.item_sk=cs2.item_sk where cs1.syear = 2000 and cs2.syear = 2000 + 1 and cs2.cnt <= cs1.cnt and cs1.store_name = cs2.store_name and cs1.store_zip = cs2.store_zip order by cs1.product_name ,cs1.store_name ,cs2.cnt; diff --git a/ql/src/test/queries/clientpositive/perf/query68.q b/ql/src/test/queries/clientpositive/perf/query68.q new file mode 100644 index 0000000..34f87c2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query68.q @@ -0,0 +1 @@ +explain select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,extended_price ,extended_tax ,list_price from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_ext_sales_price) extended_price ,sum(ss_ext_list_price) list_price ,sum(ss_ext_tax) extended_tax from store_sales ,date_dim ,store ,household_demographics ,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 2) and date_dim.d_year in (1998,1998+1,1998+2) and store.s_city in ('Rosedale','Bethlehem') group by ss_ticket_number ,ss_customer_sk ,ss_addr_sk,ca_city) dn ,customer ,customer_address current_addr where dn.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,ss_ticket_number limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query7.q b/ql/src/test/queries/clientpositive/perf/query7.q new file mode 100644 index 0000000..39c4383 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query7.q @@ -0,0 +1 @@ +explain select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, item, promotion where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and store_sales.ss_promo_sk = promotion.p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query70.q b/ql/src/test/queries/clientpositive/perf/query70.q new file mode 100644 index 0000000..df74bfb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query70.q @@ -0,0 +1 @@ +explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query71.q b/ql/src/test/queries/clientpositive/perf/query71.q new file mode 100644 index 0000000..c7c4e9f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query71.q @@ -0,0 +1 @@ +explain select i_brand_id brand_id, i_brand brand,t_hour,t_minute, sum(ext_price) ext_price from item JOIN (select ws_ext_sales_price as ext_price, ws_sold_date_sk as sold_date_sk, ws_item_sk as sold_item_sk, ws_sold_time_sk as time_sk from web_sales,date_dim where date_dim.d_date_sk = web_sales.ws_sold_date_sk and d_moy=12 and d_year=2001 union all select cs_ext_sales_price as ext_price, cs_sold_date_sk as sold_date_sk, cs_item_sk as sold_item_sk, cs_sold_time_sk as time_sk from catalog_sales,date_dim where date_dim.d_date_sk = catalog_sales.cs_sold_date_sk and d_moy=12 and d_year=2001 union all select ss_ext_sales_price as ext_price, ss_sold_date_sk as sold_date_sk, ss_item_sk as sold_item_sk, ss_sold_time_sk as time_sk from store_sales,date_dim where date_dim.d_date_sk = store_sales.ss_sold_date_sk and d_moy=12 and d_year=2001 ) tmp ON tmp.sold_item_sk = item.i_item_sk JOIN time_dim ON tmp.time_sk = time_dim.t_time_sk where i_manager_id=1 and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') group by i_brand, i_brand_id,t_hour,t_minute order by ext_price desc, i_brand_id ; diff --git a/ql/src/test/queries/clientpositive/perf/query72.q b/ql/src/test/queries/clientpositive/perf/query72.q new file mode 100644 index 0000000..ee4c3d0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query72.q @@ -0,0 +1 @@ +explain select i_item_desc ,w_warehouse_name ,d1.d_week_seq ,count(case when p_promo_sk is null then 1 else 0 end) no_promo ,count(case when p_promo_sk is not null then 1 else 0 end) promo ,count(*) total_cnt from catalog_sales join inventory on (catalog_sales.cs_item_sk = inventory.inv_item_sk) join warehouse on (warehouse.w_warehouse_sk=inventory.inv_warehouse_sk) join item on (item.i_item_sk = catalog_sales.cs_item_sk) join customer_demographics on (catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk) join household_demographics on (catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk) join date_dim d1 on (catalog_sales.cs_sold_date_sk = d1.d_date_sk) join date_dim d2 on (inventory.inv_date_sk = d2.d_date_sk) join date_dim d3 on (catalog_sales.cs_ship_date_sk = d3.d_date_sk) left outer join promotion on (catalog_sales.cs_promo_sk=promotion.p_promo_sk) left outer join catalog_returns on (catalog_returns.cr_item_sk = catalog_sales.cs_item_sk and catalog_returns.cr_order_number = catalog_sales.cs_order_number) where d1.d_week_seq = d2.d_week_seq and inv_quantity_on_hand < cs_quantity and d3.d_date > d1.d_date + 5 and hd_buy_potential = '1001-5000' and d1.d_year = 2001 and hd_buy_potential = '1001-5000' and cd_marital_status = 'M' and d1.d_year = 2001 group by i_item_desc,w_warehouse_name,d1.d_week_seq order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query73.q b/ql/src/test/queries/clientpositive/perf/query73.q new file mode 100644 index 0000000..7465e6d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query73.q @@ -0,0 +1 @@ +explain select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_buy_potential = '1001-5000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County') group by ss_ticket_number,ss_customer_sk) dj,customer where dj.ss_customer_sk = customer.c_customer_sk and cnt between 1 and 5 order by cnt desc; diff --git a/ql/src/test/queries/clientpositive/perf/query75.q b/ql/src/test/queries/clientpositive/perf/query75.q new file mode 100644 index 0000000..a815c9e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query75.q @@ -0,0 +1 @@ +explain WITH all_sales AS ( SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,SUM(sales_cnt) AS sales_cnt ,SUM(sales_amt) AS sales_amt FROM (SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk JOIN date_dim ON d_date_sk=cs_sold_date_sk LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number AND cs_item_sk=cr_item_sk) WHERE i_category='Sports' UNION ALL SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt FROM store_sales JOIN item ON i_item_sk=ss_item_sk JOIN date_dim ON d_date_sk=ss_sold_date_sk LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number AND ss_item_sk=sr_item_sk) WHERE i_category='Sports' UNION ALL SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt FROM web_sales JOIN item ON i_item_sk=ws_item_sk JOIN date_dim ON d_date_sk=ws_sold_date_sk LEFT JOIN web_returns ON (ws_order_number=wr_order_number AND ws_item_sk=wr_item_sk) WHERE i_category='Sports') sales_detail GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) SELECT prev_yr.d_year AS prev_year ,curr_yr.d_year AS year ,curr_yr.i_brand_id ,curr_yr.i_class_id ,curr_yr.i_category_id ,curr_yr.i_manufact_id ,prev_yr.sales_cnt AS prev_yr_cnt ,curr_yr.sales_cnt AS curr_yr_cnt ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff FROM all_sales curr_yr, all_sales prev_yr WHERE curr_yr.i_brand_id=prev_yr.i_brand_id AND curr_yr.i_class_id=prev_yr.i_class_id AND curr_yr.i_category_id=prev_yr.i_category_id AND curr_yr.i_manufact_id=prev_yr.i_manufact_id AND curr_yr.d_year=2002 AND prev_yr.d_year=2002-1 AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 ORDER BY sales_cnt_diff limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query76.q b/ql/src/test/queries/clientpositive/perf/query76.q new file mode 100644 index 0000000..1894cf4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query76.q @@ -0,0 +1 @@ +explain select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim WHERE ss_addr_sk IS NULL AND store_sales.ss_sold_date_sk=date_dim.d_date_sk AND store_sales.ss_item_sk=item.i_item_sk UNION ALL SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price FROM web_sales, item, date_dim WHERE ws_web_page_sk IS NULL AND web_sales.ws_sold_date_sk=date_dim.d_date_sk AND web_sales.ws_item_sk=item.i_item_sk UNION ALL SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price FROM catalog_sales, item, date_dim WHERE cs_warehouse_sk IS NULL AND catalog_sales.cs_sold_date_sk=date_dim.d_date_sk AND catalog_sales.cs_item_sk=item.i_item_sk) foo GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query79.q b/ql/src/test/queries/clientpositive/perf/query79.q new file mode 100644 index 0000000..d104602 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query79.q @@ -0,0 +1 @@ +explain select c_last_name,c_first_name,substr(s_city,1,30) sub,ss_ticket_number,amt,profit from (select ss_ticket_number ,ss_customer_sk ,store.s_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) and date_dim.d_dow = 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_number_employees between 200 and 295 group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer where ms.ss_customer_sk = customer.c_customer_sk order by c_last_name,c_first_name,sub, profit limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query80.q b/ql/src/test/queries/clientpositive/perf/query80.q new file mode 100644 index 0000000..5ded3c5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query80.q @@ -0,0 +1 @@ +explain with ssr as (select s_store_id as store_id, sum(ss_ext_sales_price) as sales, sum(coalesce(sr_return_amt, 0)) as returns, sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit from store_sales left outer join store_returns on (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion where ss_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and ss_store_sk = s_store_sk and ss_item_sk = i_item_sk and i_current_price > 50 and ss_promo_sk = p_promo_sk and p_channel_tv = 'N' group by s_store_id) , csr as (select cp_catalog_page_id as catalog_page_id, sum(cs_ext_sales_price) as sales, sum(coalesce(cr_return_amount, 0)) as returns, sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit from catalog_sales left outer join catalog_returns on (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion where cs_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and cs_catalog_page_sk = cp_catalog_page_sk and cs_item_sk = i_item_sk and i_current_price > 50 and cs_promo_sk = p_promo_sk and p_channel_tv = 'N' group by cp_catalog_page_id) , wsr as (select web_site_id, sum(ws_ext_sales_price) as sales, sum(coalesce(wr_return_amt, 0)) as returns, sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit from web_sales left outer join web_returns on (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), date_dim, web_site, item, promotion where ws_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and ws_web_site_sk = web_site_sk and ws_item_sk = i_item_sk and i_current_price > 50 and ws_promo_sk = p_promo_sk and p_channel_tv = 'N' group by web_site_id) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , concat('store', store_id) as id , sales , returns , profit from ssr union all select 'catalog channel' as channel , concat('catalog_page', catalog_page_id) as id , sales , returns , profit from csr union all select 'web channel' as channel , concat('web_site', web_site_id) as id , sales , returns , profit from wsr ) x group by channel, id with rollup order by channel ,id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query82.q b/ql/src/test/queries/clientpositive/perf/query82.q new file mode 100644 index 0000000..aec30ea --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query82.q @@ -0,0 +1 @@ +explain select i_item_id ,i_item_desc ,i_current_price from item, inventory, date_dim, store_sales where i_current_price between 30 and 30+30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk and d_date between '2002-05-30' and '2002-07-30' and i_manufact_id in (437,129,727,663) and inv_quantity_on_hand between 100 and 500 and ss_item_sk = i_item_sk group by i_item_id,i_item_desc,i_current_price order by i_item_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query84.q b/ql/src/test/queries/clientpositive/perf/query84.q new file mode 100644 index 0000000..bcc2f4e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query84.q @@ -0,0 +1 @@ +explain select c_customer_id as customer_id ,concat(c_last_name, ', ', c_first_name) as customername from customer ,customer_address ,customer_demographics ,household_demographics ,income_band ,store_returns where ca_city = 'Hopewell' and customer.c_current_addr_sk = customer_address.ca_address_sk and ib_lower_bound >= 32287 and ib_upper_bound <= 32287 + 50000 and income_band.ib_income_band_sk = household_demographics.hd_income_band_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and store_returns.sr_cdemo_sk = customer_demographics.cd_demo_sk order by customer_id limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query85.q b/ql/src/test/queries/clientpositive/perf/query85.q new file mode 100644 index 0000000..b1e2b64 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query85.q @@ -0,0 +1 @@ +explain select substr(r_reason_desc,1,20) as r ,avg(ws_quantity) wq ,avg(wr_refunded_cash) ref ,avg(wr_fee) fee from web_sales, web_returns, web_page, customer_demographics cd1, customer_demographics cd2, customer_address, date_dim, reason where web_sales.ws_web_page_sk = web_page.wp_web_page_sk and web_sales.ws_item_sk = web_returns.wr_item_sk and web_sales.ws_order_number = web_returns.wr_order_number and web_sales.ws_sold_date_sk = date_dim.d_date_sk and d_year = 1998 and cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk and cd2.cd_demo_sk = web_returns.wr_returning_cdemo_sk and customer_address.ca_address_sk = web_returns.wr_refunded_addr_sk and reason.r_reason_sk = web_returns.wr_reason_sk and ( ( cd1.cd_marital_status = 'M' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = '4 yr Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 100.00 and 150.00 ) or ( cd1.cd_marital_status = 'D' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Primary' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 50.00 and 100.00 ) or ( cd1.cd_marital_status = 'U' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Advanced Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 150.00 and 200.00 ) ) and ( ( ca_country = 'United States' and ca_state in ('KY', 'GA', 'NM') and ws_net_profit between 100 and 200 ) or ( ca_country = 'United States' and ca_state in ('MT', 'OR', 'IN') and ws_net_profit between 150 and 300 ) or ( ca_country = 'United States' and ca_state in ('WI', 'MO', 'WV') and ws_net_profit between 50 and 250 ) ) group by r_reason_desc order by r, wq, ref, fee limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query87.q b/ql/src/test/queries/clientpositive/perf/query87.q new file mode 100644 index 0000000..6fd059f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query87.q @@ -0,0 +1 @@ +explain select count(*) from (select distinct c_last_name as l1, c_first_name as f1, d_date as d1 from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t1 LEFT OUTER JOIN ( select distinct c_last_name as l2, c_first_name as f2, d_date as d2 from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk JOIN customer ON catalog_sales.cs_bill_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t2 ON t1.l1 = t2.l2 and t1.f1 = t2.f2 and t1.d1 = t2.d2 LEFT OUTER JOIN (select distinct c_last_name as l3, c_first_name as f3, d_date as d3 from web_sales JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t3 ON t1.l1 = t3.l3 and t1.f1 = t3.f3 and t1.d1 = t3.d3 WHERE l2 is null and l3 is null ; diff --git a/ql/src/test/queries/clientpositive/perf/query90.q b/ql/src/test/queries/clientpositive/perf/query90.q new file mode 100644 index 0000000..e067de9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query90.q @@ -0,0 +1 @@ +explain select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 6 and 6+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) at, ( select count(*) pmc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 14 and 14+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) pt order by am_pm_ratio limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query91.q b/ql/src/test/queries/clientpositive/perf/query91.q new file mode 100644 index 0000000..3b1dd20 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query91.q @@ -0,0 +1 @@ +explain select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk and catalog_returns.cr_returned_date_sk = date_dim.d_date_sk and catalog_returns.cr_returning_customer_sk= customer.c_customer_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and customer_address.ca_address_sk = customer.c_current_addr_sk and d_year = 1999 and d_moy = 11 and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like '0-500%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by Returns_Loss desc; diff --git a/ql/src/test/queries/clientpositive/perf/query92.q b/ql/src/test/queries/clientpositive/perf/query92.q new file mode 100644 index 0000000..625e99f --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query92.q @@ -0,0 +1 @@ +explain SELECT sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) as store_only, sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) as catalog_only, sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) as store_and_catalog FROM (SELECT ss.ss_customer_sk as customer_sk, ss.ss_item_sk as item_sk FROM store_sales ss JOIN date_dim d1 ON (ss.ss_sold_date_sk = d1.d_date_sk) WHERE d1.d_month_seq >= 1206 and d1.d_month_seq <= 1217 GROUP BY ss.ss_customer_sk, ss.ss_item_sk) ssci FULL OUTER JOIN (SELECT cs.cs_bill_customer_sk as customer_sk, cs.cs_item_sk as item_sk FROM catalog_sales cs JOIN date_dim d2 ON (cs.cs_sold_date_sk = d2.d_date_sk) WHERE d2.d_month_seq >= 1206 and d2.d_month_seq <= 1217 GROUP BY cs.cs_bill_customer_sk, cs.cs_item_sk) csci ON (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk); diff --git a/ql/src/test/queries/clientpositive/perf/query93.q b/ql/src/test/queries/clientpositive/perf/query93.q new file mode 100644 index 0000000..b60b041 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query93.q @@ -0,0 +1 @@ +explain select ss_customer_sk ,sum(act_sales) sumsales from (select ss_item_sk ,ss_ticket_number ,ss_customer_sk ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price else (ss_quantity*ss_sales_price) end act_sales from store_sales left outer join store_returns on (store_returns.sr_item_sk = store_sales.ss_item_sk and store_returns.sr_ticket_number = store_sales.ss_ticket_number) ,reason where store_returns.sr_reason_sk = reason.r_reason_sk and r_reason_desc = 'Did not like the warranty') t group by ss_customer_sk order by sumsales, ss_customer_sk limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query94.q b/ql/src/test/queries/clientpositive/perf/query94.q new file mode 100644 index 0000000..f9f4bc1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query94.q @@ -0,0 +1 @@ +explain SELECT count(distinct ws_order_number) as order_count, sum(ws_ext_ship_cost) as total_shipping_cost, sum(ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT OUTER JOIN web_returns wr1 ON (ws1.ws_order_number = wr1.wr_order_number) WHERE d.d_date between '1999-05-01' and '1999-07-01' and ca.ca_state = 'TX' and s.web_company_name = 'pri' and wr1.wr_order_number is null limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query95.q b/ql/src/test/queries/clientpositive/perf/query95.q new file mode 100644 index 0000000..fbd2d47 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query95.q @@ -0,0 +1 @@ +explain SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk ) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri'; diff --git a/ql/src/test/queries/clientpositive/perf/query96.q b/ql/src/test/queries/clientpositive/perf/query96.q new file mode 100644 index 0000000..29265ed --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query96.q @@ -0,0 +1 @@ +explain select count(*) as c from store_sales ,household_demographics ,time_dim, store where store_sales.ss_sold_time_sk = time_dim.t_time_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_store_sk = store.s_store_sk and time_dim.t_hour = 8 and time_dim.t_minute >= 30 and household_demographics.hd_dep_count = 5 and store.s_store_name = 'ese' order by c limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query97.q b/ql/src/test/queries/clientpositive/perf/query97.q new file mode 100644 index 0000000..4995309 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query97.q @@ -0,0 +1 @@ +explain select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog from ( select ss_customer_sk customer_sk ,ss_item_sk item_sk from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by ss_customer_sk ,ss_item_sk) ssci full outer join ( select cs_bill_customer_sk customer_sk ,cs_item_sk item_sk from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by cs_bill_customer_sk ,cs_item_sk) csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) limit 100; diff --git a/ql/src/test/queries/clientpositive/perf/query98.q b/ql/src/test/queries/clientpositive/perf/query98.q new file mode 100644 index 0000000..5605d62 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query98.q @@ -0,0 +1 @@ +explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ss_ext_sales_price) as itemrevenue ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio from store_sales ,item ,date_dim where store_sales.ss_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_date between cast('2001-01-12' as date) and (cast('2001-02-11' as date)) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio; diff --git a/ql/src/test/results/clientpositive/perf/query12.q.out b/ql/src/test/results/clientpositive/perf/query12.q.out new file mode 100644 index 0000000..d6daebd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query12.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ws_ext_sales_price) as itemrevenue ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over (partition by i_class) as revenueratio from web_sales ,item ,date_dim where web_sales.ws_item_sk = item.i_item_sk and item.i_category in ('Jewelry', 'Sports', 'Books') and web_sales.ws_sold_date_sk = date_dim.d_date_sk and date_dim.d_date between '2001-01-12' and '2001-02-11' group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ws_ext_sales_price) as itemrevenue ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over (partition by i_class) as revenueratio from web_sales ,item ,date_dim where web_sales.ws_item_sk = item.i_item_sk and item.i_category in ('Jewelry', 'Sports', 'Books') and web_sales.ws_sold_date_sk = date_dim.d_date_sk and date_dim.d_date between '2001-01-12' and '2001-02-11' group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN '2001-01-12' AND '2001-02-11' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col2 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)), KEY._col3 (type: string), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col3 (type: string), _col2 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: decimal(7,2), _col5: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col0 (type: string), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,23)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col0 (type: string), _col6 (type: decimal(38,23)) + sort order: +++++ + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,23)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query13.q.out b/ql/src/test/results/clientpositive/perf/query13.q.out new file mode 100644 index 0000000..c9190fa --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query13.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: explain select avg(ss_quantity) ,avg(ss_ext_sales_price) ,avg(ss_ext_wholesale_cost) ,sum(ss_ext_wholesale_cost) from store_sales ,store ,customer_demographics ,household_demographics ,customer_address ,date_dim where store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_sold_date_sk = date_dim.d_date_sk and date_dim.d_year = 2001 and((store_sales.ss_hdemo_sk=household_demographics.hd_demo_sk and customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M' and customer_demographics.cd_education_status = '4 yr Degree' and store_sales.ss_sales_price between 100.00 and 150.00 and household_demographics.hd_dep_count = 3 )or (store_sales.ss_hdemo_sk=household_demographics.hd_demo_sk and customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Primary' and store_sales.ss_sales_price between 50.00 and 100.00 and household_demographics.hd_dep_count = 1 ) or (store_sales.ss_hdemo_sk=household_demographics.hd_demo_sk and customer_demographics.cd_demo_sk = ss_cdemo_sk and customer_demographics.cd_marital_status = 'U' and customer_demographics.cd_education_status = 'Advanced Degree' and store_sales.ss_sales_price between 150.00 and 200.00 and household_demographics.hd_dep_count = 1 )) and((store_sales.ss_addr_sk = customer_address.ca_address_sk and customer_address.ca_country = 'United States' and customer_address.ca_state in ('KY', 'GA', 'NM') and store_sales.ss_net_profit between 100 and 200 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and customer_address.ca_country = 'United States' and customer_address.ca_state in ('MT', 'OR', 'IN') and store_sales.ss_net_profit between 150 and 300 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and customer_address.ca_country = 'United States' and customer_address.ca_state in ('WI', 'MO', 'WV') and store_sales.ss_net_profit between 50 and 250 )) +PREHOOK: type: QUERY +POSTHOOK: query: explain select avg(ss_quantity) ,avg(ss_ext_sales_price) ,avg(ss_ext_wholesale_cost) ,sum(ss_ext_wholesale_cost) from store_sales ,store ,customer_demographics ,household_demographics ,customer_address ,date_dim where store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_sold_date_sk = date_dim.d_date_sk and date_dim.d_year = 2001 and((store_sales.ss_hdemo_sk=household_demographics.hd_demo_sk and customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'M' and customer_demographics.cd_education_status = '4 yr Degree' and store_sales.ss_sales_price between 100.00 and 150.00 and household_demographics.hd_dep_count = 3 )or (store_sales.ss_hdemo_sk=household_demographics.hd_demo_sk and customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Primary' and store_sales.ss_sales_price between 50.00 and 100.00 and household_demographics.hd_dep_count = 1 ) or (store_sales.ss_hdemo_sk=household_demographics.hd_demo_sk and customer_demographics.cd_demo_sk = ss_cdemo_sk and customer_demographics.cd_marital_status = 'U' and customer_demographics.cd_education_status = 'Advanced Degree' and store_sales.ss_sales_price between 150.00 and 200.00 and household_demographics.hd_dep_count = 1 )) and((store_sales.ss_addr_sk = customer_address.ca_address_sk and customer_address.ca_country = 'United States' and customer_address.ca_state in ('KY', 'GA', 'NM') and store_sales.ss_net_profit between 100 and 200 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and customer_address.ca_country = 'United States' and customer_address.ca_state in ('MT', 'OR', 'IN') and store_sales.ss_net_profit between 150 and 300 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and customer_address.ca_country = 'United States' and customer_address.ca_state in ('WI', 'MO', 'WV') and store_sales.ss_net_profit between 50 and 250 )) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((((((ss_sales_price BETWEEN 100.0 AND 150.0 or ss_sales_price BETWEEN 50.0 AND 100.0 or ss_sales_price BETWEEN 150.0 AND 200.0) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250)) and ss_store_sk is not null) and ss_cdemo_sk is not null) and ss_hdemo_sk is not null) and ss_addr_sk is not null) and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + TableScan + alias: customer_demographics + Statistics: Num rows: 67800 Data size: 7175859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and ((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree'))) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 33900 Data size: 3587929 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 33900 Data size: 3587929 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 33900 Data size: 3587929 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col12, _col13 + Statistics: Num rows: 37290 Data size: 3946721 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 37290 Data size: 3946721 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col12 (type: string), _col13 (type: string) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int), hd_dep_count (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col5, _col6, _col7, _col8, _col9, _col12, _col13, _col15 + Statistics: Num rows: 41019 Data size: 4341393 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 = 'M') and (_col13 = '4 yr Degree') and _col6 BETWEEN 100.0 AND 150.0 and (_col15 = 3)) or ((_col12 = 'D') and (_col13 = 'Primary') and _col6 BETWEEN 50.0 AND 100.0 and (_col15 = 1)) or ((_col12 = 'U') and (_col13 = 'Advanced Degree') and _col6 BETWEEN 150.0 AND 200.0 and (_col15 = 1))) (type: boolean) + Statistics: Num rows: 7689 Data size: 813792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + outputColumnNames: _col0, _col3, _col5, _col7, _col8, _col9 + Statistics: Num rows: 7689 Data size: 813792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 7689 Data size: 813792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col5, _col7, _col8, _col9, _col17 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col17) IN ('KY', 'GA', 'NM') and _col9 BETWEEN 100 AND 200) or ((_col17) IN ('MT', 'OR', 'IN') and _col9 BETWEEN 150 AND 300) or ((_col17) IN ('WI', 'MO', 'WV') and _col9 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + outputColumnNames: _col0, _col5, _col7, _col8 + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7, _col8 + Statistics: Num rows: 9075000 Data size: 9210035328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col5), avg(_col7), avg(_col8), sum(_col8) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query15.q.out b/ql/src/test/results/clientpositive/perf/query15.q.out new file mode 100644 index 0000000..d20e37e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query15.q.out @@ -0,0 +1,220 @@ +PREHOOK: query: explain select ca_zip ,sum(cs_sales_price) from catalog_sales ,customer ,customer_address ,date_dim where catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') or customer_address.ca_state in ('CA','WA','GA') or catalog_sales.cs_sales_price > 500) and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and date_dim.d_qoy = 2 and date_dim.d_year = 2000 group by ca_zip order by ca_zip limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select ca_zip ,sum(cs_sales_price) from catalog_sales ,customer ,customer_address ,date_dim where catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') or customer_address.ca_state in ('CA','WA','GA') or catalog_sales.cs_sales_price > 500) and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and date_dim.d_qoy = 2 and date_dim.d_year = 2000 group by ca_zip order by ca_zip limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col6, _col7 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((substr(_col7, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or (_col6) IN ('CA', 'WA', 'GA') or (_col2 > 500)) (type: boolean) + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col7 (type: string) + outputColumnNames: _col0, _col2, _col7 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col7 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 2) and (d_year = 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col7, _col2 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13310000 Data size: 11446869580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 13310000 Data size: 11446869580 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13310000 Data size: 11446869580 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query17.q.out b/ql/src/test/results/clientpositive/perf/query17.q.out new file mode 100644 index 0000000..c7064b9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query17.q.out @@ -0,0 +1,387 @@ +PREHOOK: query: explain select i_item_id ,i_item_desc ,s_state ,count(ss_quantity) as store_sales_quantitycount ,avg(ss_quantity) as store_sales_quantityave ,stddev_samp(ss_quantity) as store_sales_quantitystdev ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov ,count(sr_return_quantity) as_store_returns_quantitycount ,avg(sr_return_quantity) as_store_returns_quantityave ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_quarter_name = '2000Q1' and d1.d_date_sk = store_sales.ss_sold_date_sk and item.i_item_sk = store_sales.ss_item_sk and store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_customer_sk = store_returns.sr_customer_sk and store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number and store_returns.sr_returned_date_sk = d2.d_date_sk and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk and store_returns.sr_item_sk = catalog_sales.cs_item_sk and catalog_sales.cs_sold_date_sk = d3.d_date_sk and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') group by i_item_id ,i_item_desc ,s_state order by i_item_id ,i_item_desc ,s_state limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id ,i_item_desc ,s_state ,count(ss_quantity) as store_sales_quantitycount ,avg(ss_quantity) as store_sales_quantityave ,stddev_samp(ss_quantity) as store_sales_quantitystdev ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov ,count(sr_return_quantity) as_store_returns_quantitycount ,avg(sr_return_quantity) as_store_returns_quantityave ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_quarter_name = '2000Q1' and d1.d_date_sk = store_sales.ss_sold_date_sk and item.i_item_sk = store_sales.ss_item_sk and store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_customer_sk = store_returns.sr_customer_sk and store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number and store_returns.sr_returned_date_sk = d2.d_date_sk and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk and store_returns.sr_item_sk = catalog_sales.cs_item_sk and catalog_sales.cs_sold_date_sk = d3.d_date_sk and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') group by i_item_id ,i_item_desc ,s_state order by i_item_id ,i_item_desc ,s_state limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-9 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((ss_customer_sk is not null and ss_item_sk is not null) and ss_ticket_number is not null) and ss_sold_date_sk is not null) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((sr_customer_sk is not null and sr_item_sk is not null) and sr_ticket_number is not null) and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int), _col4 (type: int) + 1 _col2 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col8 (type: int), _col7 (type: int) + sort order: ++ + Map-reduce partition columns: _col8 (type: int), _col7 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int) + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_bill_customer_sk is not null and cs_item_sk is not null) and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col8 (type: int), _col7 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col10, _col11, _col14 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col14 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col10, _col11, _col14 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col14 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col14 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col10 (type: int), _col14 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col14 + Statistics: Num rows: 24305 Data size: 27199223 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 24305 Data size: 27199223 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col10 (type: int), _col14 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col10, _col14, _col22 + Statistics: Num rows: 26735 Data size: 29919145 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26735 Data size: 29919145 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col10 (type: int), _col14 (type: int), _col22 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col10, _col14, _col22, _col24, _col25 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col22 (type: string), _col24 (type: string), _col25 (type: string), _col5 (type: int), _col10 (type: int), _col14 (type: int) + outputColumnNames: _col22, _col24, _col25, _col5, _col10, _col14 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col5), avg(_col5), stddev_samp(_col5), count(_col10), avg(_col10), stddev_samp(_col10), count(_col14), avg(_col14), stddev_samp(_col14) + keys: _col22 (type: string), _col24 (type: string), _col25 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: bigint), _col10 (type: struct), _col11 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), avg(VALUE._col1), stddev_samp(VALUE._col2), count(VALUE._col3), avg(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7), stddev_samp(VALUE._col8) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), (_col8 / _col7) (type: double), _col9 (type: bigint), _col10 (type: double), (_col11 / _col10) (type: double), _col0 (type: string), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), (_col5 / _col4) (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: bigint), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col10 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query18.q.out b/ql/src/test/results/clientpositive/perf/query18.q.out new file mode 100644 index 0000000..73dfd3a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query18.q.out @@ -0,0 +1,346 @@ +PREHOOK: query: explain select i_item_id, ca_country, ca_state, ca_county, avg( cast(cs_quantity as decimal(12,2))) agg1, avg( cast(cs_list_price as decimal(12,2))) agg2, avg( cast(cs_coupon_amt as decimal(12,2))) agg3, avg( cast(cs_sales_price as decimal(12,2))) agg4, avg( cast(cs_net_profit as decimal(12,2))) agg5, avg( cast(c_birth_year as decimal(12,2))) agg6, avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 from catalog_sales, date_dim, customer_demographics cd1, item, customer, customer_address, customer_demographics cd2 where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_item_sk = item.i_item_sk and catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and cd1.cd_gender = 'M' and cd1.cd_education_status = 'College' and customer.c_current_cdemo_sk = cd2.cd_demo_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and c_birth_month in (9,5,12,4,1,10) and d_year = 2001 and ca_state in ('ND','WI','AL' ,'NC','OK','MS','TN') group by i_item_id, ca_country, ca_state, ca_county with rollup order by ca_country, ca_state, ca_county, i_item_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id, ca_country, ca_state, ca_county, avg( cast(cs_quantity as decimal(12,2))) agg1, avg( cast(cs_list_price as decimal(12,2))) agg2, avg( cast(cs_coupon_amt as decimal(12,2))) agg3, avg( cast(cs_sales_price as decimal(12,2))) agg4, avg( cast(cs_net_profit as decimal(12,2))) agg5, avg( cast(c_birth_year as decimal(12,2))) agg6, avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 from catalog_sales, date_dim, customer_demographics cd1, item, customer, customer_address, customer_demographics cd2 where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_item_sk = item.i_item_sk and catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and cd1.cd_gender = 'M' and cd1.cd_education_status = 'College' and customer.c_current_cdemo_sk = cd2.cd_demo_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and c_birth_month in (9,5,12,4,1,10) and d_year = 2001 and ca_state in ('ND','WI','AL' ,'NC','OK','MS','TN') group by i_item_id, ca_country, ca_state, ca_county with rollup order by ca_country, ca_state, ca_county, i_item_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null) and cs_item_sk is not null) and cs_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: cd1 + Statistics: Num rows: 139800 Data size: 71713059 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((cd_gender = 'M') and (cd_education_status = 'College')) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 17475 Data size: 8964132 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_dep_count (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 17475 Data size: 8964132 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17475 Data size: 8964132 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col14 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8, _col14, _col16 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int), _col16 (type: string) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null) and c_current_addr_sk is not null) and c_current_cdemo_sk is not null) (type: boolean) + Statistics: Num rows: 5000000 Data size: 4300100990 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 5000000 Data size: 4300100990 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5000000 Data size: 4300100990 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col14, _col16, _col18, _col19, _col21 + Statistics: Num rows: 5500000 Data size: 4730111191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col19 (type: int) + sort order: + + Map-reduce partition columns: _col19 (type: int) + Statistics: Num rows: 5500000 Data size: 4730111191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int), _col16 (type: string), _col18 (type: int), _col21 (type: int) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col19 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col14, _col16, _col18, _col21, _col23, _col24, _col25 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col18 (type: int) + sort order: + + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int), _col16 (type: string), _col21 (type: int), _col23 (type: string), _col24 (type: string), _col25 (type: string) + TableScan + alias: cd1 + Statistics: Num rows: 139800 Data size: 71713059 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 69900 Data size: 35856529 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 69900 Data size: 35856529 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 69900 Data size: 35856529 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col18 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col14, _col16, _col21, _col23, _col24, _col25 + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col16 (type: string), _col25 (type: string), _col24 (type: string), _col23 (type: string), CAST( _col4 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col5 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col7 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col6 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col8 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col21 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col14 AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col4), avg(_col5), avg(_col6), avg(_col7), avg(_col8), avg(_col9), avg(_col10) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 60500000 Data size: 61400235525 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 60500000 Data size: 61400235525 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3), avg(VALUE._col4), avg(VALUE._col5), avg(VALUE._col6) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 30250000 Data size: 30700117762 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)), _col11 (type: decimal(16,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 30250000 Data size: 30700117762 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col0 (type: string) + sort order: ++++ + Statistics: Num rows: 30250000 Data size: 30700117762 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: decimal(16,6)), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(16,6)), VALUE._col1 (type: decimal(16,6)), VALUE._col2 (type: decimal(16,6)), VALUE._col3 (type: decimal(16,6)), VALUE._col4 (type: decimal(16,6)), VALUE._col5 (type: decimal(16,6)), VALUE._col6 (type: decimal(16,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 30250000 Data size: 30700117762 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query19.q.out b/ql/src/test/results/clientpositive/perf/query19.q.out new file mode 100644 index 0000000..6a43aad --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query19.q.out @@ -0,0 +1,301 @@ +PREHOOK: query: explain select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item,customer,customer_address,store where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and i_manager_id=7 and d_moy=11 and d_year=1999 and store_sales.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and substr(ca_zip,1,5) <> substr(s_zip,1,5) and store_sales.ss_store_sk = store.s_store_sk group by i_brand ,i_brand_id ,i_manufact_id ,i_manufact order by ext_price desc ,i_brand ,i_brand_id ,i_manufact_id ,i_manufact limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item,customer,customer_address,store where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and i_manager_id=7 and d_moy=11 and d_year=1999 and store_sales.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = customer_address.ca_address_sk and substr(ca_zip,1,5) <> substr(s_zip,1,5) and store_sales.ss_store_sk = store.s_store_sk group by i_brand ,i_brand_id ,i_manufact_id ,i_manufact order by ext_price desc ,i_brand ,i_brand_id ,i_manufact_id ,i_manufact limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 11) and (d_year = 1999)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_sold_date_sk is not null and ss_item_sk is not null) and ss_customer_sk is not null) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 7) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string), i_manufact_id (type: int), i_manufact (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col7, _col9, _col10, _col11, _col12 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col6, _col7, _col9, _col10, _col11, _col12, _col15 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col15 (type: int) + sort order: + + Map-reduce partition columns: _col15 (type: int) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col15 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col6, _col7, _col9, _col10, _col11, _col12, _col17 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col7 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string), _col17 (type: string) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col7, _col9, _col10, _col11, _col12, _col17, _col19 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (substr(_col17, 1, 5) <> substr(_col19, 1, 5)) (type: boolean) + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string), _col7 (type: decimal(7,2)) + outputColumnNames: _col9, _col10, _col11, _col12, _col7 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col7) + keys: _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13310000 Data size: 11446869580 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: decimal(17,2)), _col1 (type: string), _col0 (type: int), _col2 (type: int), _col3 (type: string) + sort order: -++++ + Statistics: Num rows: 13310000 Data size: 11446869580 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13310000 Data size: 11446869580 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query20.q.out b/ql/src/test/results/clientpositive/perf/query20.q.out new file mode 100644 index 0000000..9b873f8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query20.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(cs_ext_sales_price) as itemrevenue ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over (partition by i_class) as revenueratio from catalog_sales ,item ,date_dim where catalog_sales.cs_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and d_date between '2001-01-12' and '2001-02-11' group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(cs_ext_sales_price) as itemrevenue ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over (partition by i_class) as revenueratio from catalog_sales ,item ,date_dim where catalog_sales.cs_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and d_date between '2001-01-12' and '2001-02-11' group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN '2001-01-12' AND '2001-02-11' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col2 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)), KEY._col3 (type: string), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col3 (type: string), _col2 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: decimal(7,2), _col5: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col0 (type: string), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,23)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col0 (type: string), _col6 (type: decimal(38,23)) + sort order: +++++ + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,23)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query22.q.out b/ql/src/test/results/clientpositive/perf/query22.q.out new file mode 100644 index 0000000..1e2f29c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query22.q.out @@ -0,0 +1,216 @@ +PREHOOK: query: explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((inv_date_sk is not null and inv_item_sk is not null) and inv_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col7, _col8, _col9, _col10 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string) + TableScan + alias: warehouse + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: w_warehouse_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col7, _col8, _col9, _col10 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col3 (type: int) + outputColumnNames: _col7, _col8, _col9, _col10, _col3 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col3) + keys: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1397550 Data size: 2007270460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 1397550 Data size: 2007270460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 698775 Data size: 1003635230 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 698775 Data size: 1003635230 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: double), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: +++++ + Statistics: Num rows: 698775 Data size: 1003635230 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 698775 Data size: 1003635230 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query25.q.out b/ql/src/test/results/clientpositive/perf/query25.q.out new file mode 100644 index 0000000..9ce0fac --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query25.q.out @@ -0,0 +1,387 @@ +PREHOOK: query: explain select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_net_profit) as store_sales_profit ,sum(sr_net_loss) as store_returns_loss ,sum(cs_net_profit) as catalog_sales_profit from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 4 and d1.d_year = 1998 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 10 and d2.d_year = 1998 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_moy between 4 and 10 and d3.d_year = 1998 group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_net_profit) as store_sales_profit ,sum(sr_net_loss) as store_returns_loss ,sum(cs_net_profit) as catalog_sales_profit from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 4 and d1.d_year = 1998 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 4 and 10 and d2.d_year = 1998 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_moy between 4 and 10 and d3.d_year = 1998 group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-9 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((ss_customer_sk is not null and ss_item_sk is not null) and ss_ticket_number is not null) and ss_sold_date_sk is not null) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((sr_customer_sk is not null and sr_item_sk is not null) and sr_ticket_number is not null) and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int), _col4 (type: int) + 1 _col2 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col8 (type: int), _col7 (type: int) + sort order: ++ + Map-reduce partition columns: _col8 (type: int), _col7 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: int), _col10 (type: decimal(7,2)) + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_bill_customer_sk is not null and cs_item_sk is not null) and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col8 (type: int), _col7 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col10, _col11, _col14 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: int), _col10 (type: decimal(7,2)), _col11 (type: int), _col14 (type: decimal(7,2)) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 4) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col10, _col11, _col14 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: int), _col14 (type: decimal(7,2)) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy BETWEEN 4 AND 10 and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col14 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy BETWEEN 4 AND 10 and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col14 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col10, _col14, _col25, _col26 + Statistics: Num rows: 13367 Data size: 14959572 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13367 Data size: 14959572 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col25 (type: string), _col26 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col10, _col14, _col25, _col26, _col28, _col29 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col25 (type: string), _col26 (type: string), _col28 (type: string), _col29 (type: string), _col5 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + outputColumnNames: _col25, _col26, _col28, _col29, _col5, _col10, _col14 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col10), sum(_col14) + keys: _col25 (type: string), _col26 (type: string), _col28 (type: string), _col29 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query26.q.out b/ql/src/test/results/clientpositive/perf/query26.q.out new file mode 100644 index 0000000..6c1abbd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query26.q.out @@ -0,0 +1,254 @@ +PREHOOK: query: explain select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, avg(cs_coupon_amt) agg3, avg(cs_sales_price) agg4 from catalog_sales, customer_demographics, date_dim, item, promotion where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_item_sk = item.i_item_sk and catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk and catalog_sales.cs_promo_sk = promotion.p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, avg(cs_coupon_amt) agg3, avg(cs_sales_price) agg4 from catalog_sales, customer_demographics, date_dim, item, promotion where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_item_sk = item.i_item_sk and catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk and catalog_sales.cs_promo_sk = promotion.p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null) and cs_item_sk is not null) and cs_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: customer_demographics + Statistics: Num rows: 259800 Data size: 71725059 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((cd_gender = 'F') and (cd_marital_status = 'W')) and (cd_education_status = 'Primary')) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 16238 Data size: 4482954 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 16238 Data size: 4482954 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 16238 Data size: 4482954 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 17861 Data size: 4931249 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17861 Data size: 4931249 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col15 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col15 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col15, _col4, _col5, _col7, _col6 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6) + keys: _col15 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query27.q.out b/ql/src/test/results/clientpositive/perf/query27.q.out new file mode 100644 index 0000000..ec53006 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query27.q.out @@ -0,0 +1,259 @@ +PREHOOK: query: explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_cdemo_sk is not null and ss_sold_date_sk is not null) and ss_store_sk is not null) and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: customer_demographics + Statistics: Num rows: 283800 Data size: 71727459 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((cd_gender = 'F') and (cd_marital_status = 'D')) and (cd_education_status = 'Unknown')) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 17738 Data size: 4483092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17738 Data size: 4483092 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17738 Data size: 4483092 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 19511 Data size: 4931401 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 19511 Data size: 4931401 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 21462 Data size: 5424541 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 21462 Data size: 5424541 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col15 + Statistics: Num rows: 23608 Data size: 5966995 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 23608 Data size: 5966995 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col17 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col15, _col17, _col4, _col5, _col7, _col6 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6) + keys: _col15 (type: string), _col17 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: double), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query29.q.out b/ql/src/test/results/clientpositive/perf/query29.q.out new file mode 100644 index 0000000..87a2bb8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query29.q.out @@ -0,0 +1,387 @@ +PREHOOK: query: explain select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_quantity) as store_sales_quantity ,sum(sr_return_quantity) as store_returns_quantity ,sum(cs_quantity) as catalog_sales_quantity from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 2 and d1.d_year = 2000 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 2 and 2 + 3 and d2.d_year = 2000 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_year in (2000,2000+1,2000+2) group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id ,i_item_desc ,s_store_id ,s_store_name ,sum(ss_quantity) as store_sales_quantity ,sum(sr_return_quantity) as store_returns_quantity ,sum(cs_quantity) as catalog_sales_quantity from store_sales ,store_returns ,catalog_sales ,date_dim d1 ,date_dim d2 ,date_dim d3 ,store ,item where d1.d_moy = 2 and d1.d_year = 2000 and d1.d_date_sk = ss_sold_date_sk and i_item_sk = ss_item_sk and s_store_sk = ss_store_sk and ss_customer_sk = sr_customer_sk and ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number and sr_returned_date_sk = d2.d_date_sk and d2.d_moy between 2 and 2 + 3 and d2.d_year = 2000 and sr_customer_sk = cs_bill_customer_sk and sr_item_sk = cs_item_sk and cs_sold_date_sk = d3.d_date_sk and d3.d_year in (2000,2000+1,2000+2) group by i_item_id ,i_item_desc ,s_store_id ,s_store_name order by i_item_id ,i_item_desc ,s_store_id ,s_store_name limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-9 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((ss_customer_sk is not null and ss_item_sk is not null) and ss_ticket_number is not null) and ss_sold_date_sk is not null) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: int) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((sr_customer_sk is not null and sr_item_sk is not null) and sr_ticket_number is not null) and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int), _col4 (type: int) + 1 _col2 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col8 (type: int), _col7 (type: int) + sort order: ++ + Map-reduce partition columns: _col8 (type: int), _col7 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int) + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_bill_customer_sk is not null and cs_item_sk is not null) and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col8 (type: int), _col7 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col10, _col11, _col14 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col14 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 2) and (d_year = 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col10, _col11, _col14 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col14 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy BETWEEN 2 AND 5 and (d_year = 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col14 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col10 (type: int), _col14 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col14 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col10 (type: int), _col14 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col10, _col14, _col24, _col25 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col10 (type: int), _col14 (type: int), _col24 (type: string), _col25 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col10, _col14, _col24, _col25, _col27, _col28 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col24 (type: string), _col25 (type: string), _col27 (type: string), _col28 (type: string), _col5 (type: int), _col10 (type: int), _col14 (type: int) + outputColumnNames: _col24, _col25, _col27, _col28, _col5, _col10, _col14 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col10), sum(_col14) + keys: _col24 (type: string), _col25 (type: string), _col27 (type: string), _col28 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query3.q.out b/ql/src/test/results/clientpositive/perf/query3.q.out new file mode 100644 index 0000000..20954c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query3.q.out @@ -0,0 +1,171 @@ +PREHOOK: query: explain select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) sum_agg from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 436 and dt.d_moy=12 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,sum_agg desc ,brand_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) sum_agg from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 436 and dt.d_moy=12 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,sum_agg desc ,brand_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dt + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 12) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manufact_id = 436) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col7 (type: int), _col8 (type: string), _col5 (type: decimal(7,2)) + outputColumnNames: _col1, _col7, _col8, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col1 (type: int), _col7 (type: int), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: decimal(17,2)), _col1 (type: int) + sort order: +-+ + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query31.q.out b/ql/src/test/results/clientpositive/perf/query31.q.out new file mode 100644 index 0000000..c3f36fd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query31.q.out @@ -0,0 +1,944 @@ +PREHOOK: query: explain with ss as (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales from store_sales,date_dim,customer_address where ss_sold_date_sk = d_date_sk and ss_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year), ws as (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales from web_sales,date_dim,customer_address where ws_sold_date_sk = d_date_sk and ws_bill_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year) select ss1.ca_county ,ss1.d_year ,ws2.web_sales/ws1.web_sales web_q1_q2_increase ,ss2.store_sales/ss1.store_sales store_q1_q2_increase ,ws3.web_sales/ws2.web_sales web_q2_q3_increase ,ss3.store_sales/ss2.store_sales store_q2_q3_increase from ss ss1 ,ss ss2 ,ss ss3 ,ws ws1 ,ws ws2 ,ws ws3 where ss1.d_qoy = 1 and ss1.d_year = 1998 and ss1.ca_county = ss2.ca_county and ss2.d_qoy = 2 and ss2.d_year = 1998 and ss2.ca_county = ss3.ca_county and ss3.d_qoy = 3 and ss3.d_year = 1998 and ss1.ca_county = ws1.ca_county and ws1.d_qoy = 1 and ws1.d_year = 1998 and ws1.ca_county = ws2.ca_county and ws2.d_qoy = 2 and ws2.d_year = 1998 and ws1.ca_county = ws3.ca_county and ws3.d_qoy = 3 and ws3.d_year =1998 and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end order by web_q1_q2_increase +PREHOOK: type: QUERY +POSTHOOK: query: explain with ss as (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales from store_sales,date_dim,customer_address where ss_sold_date_sk = d_date_sk and ss_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year), ws as (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales from web_sales,date_dim,customer_address where ws_sold_date_sk = d_date_sk and ws_bill_addr_sk=ca_address_sk group by ca_county,d_qoy, d_year) select ss1.ca_county ,ss1.d_year ,ws2.web_sales/ws1.web_sales web_q1_q2_increase ,ss2.store_sales/ss1.store_sales store_q1_q2_increase ,ws3.web_sales/ws2.web_sales web_q2_q3_increase ,ss3.store_sales/ss2.store_sales store_q2_q3_increase from ss ss1 ,ss ss2 ,ss ss3 ,ws ws1 ,ws ws2 ,ws ws3 where ss1.d_qoy = 1 and ss1.d_year = 1998 and ss1.ca_county = ss2.ca_county and ss2.d_qoy = 2 and ss2.d_year = 1998 and ss2.ca_county = ss3.ca_county and ss3.d_qoy = 3 and ss3.d_year = 1998 and ss1.ca_county = ws1.ca_county and ws1.d_qoy = 1 and ws1.d_year = 1998 and ws1.ca_county = ws2.ca_county and ws2.d_qoy = 2 and ws2.d_year = 1998 and ws1.ca_county = ws3.ca_county and ws3.d_qoy = 3 and ws3.d_year =1998 and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end order by web_q1_q2_increase +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-11, Stage-16, Stage-21, Stage-26 + Stage-5 depends on stages: Stage-4, Stage-31 + Stage-6 depends on stages: Stage-5 + Stage-9 is a root stage + Stage-10 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-10 + Stage-14 is a root stage + Stage-15 depends on stages: Stage-14 + Stage-16 depends on stages: Stage-15 + Stage-19 is a root stage + Stage-20 depends on stages: Stage-19 + Stage-21 depends on stages: Stage-20 + Stage-24 is a root stage + Stage-25 depends on stages: Stage-24 + Stage-26 depends on stages: Stage-25 + Stage-29 is a root stage + Stage-30 depends on stages: Stage-29 + Stage-31 depends on stages: Stage-30 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 1) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), 1 (type: int), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col7, _col2 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: int), _col5 (type: int), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col0 (type: int), _col3 (type: decimal(17,2)) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(17,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + Inner Join 3 to 4 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + 3 _col0 (type: string) + 4 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col7, _col11, _col12, _col15, _col19 + Statistics: Num rows: 24200000 Data size: 24560094209 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CASE WHEN ((_col15 > 0)) THEN ((_col19 / _col15)) ELSE (null) END > CASE WHEN ((_col3 > 0)) THEN ((_col7 / _col3)) ELSE (null) END) (type: boolean) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: decimal(17,2)), _col12 (type: string), _col15 (type: decimal(17,2)), _col19 (type: decimal(17,2)), _col2 (type: int), _col3 (type: decimal(17,2)), _col7 (type: decimal(17,2)) + outputColumnNames: _col0, _col11, _col12, _col15, _col19, _col2, _col3, _col7 + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col12 (type: string) + sort order: + + Map-reduce partition columns: _col12 (type: string) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col15 (type: decimal(17,2)), _col19 (type: decimal(17,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col7, _col11, _col15, _col19, _col23 + Statistics: Num rows: 8873332 Data size: 9005367327 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CASE WHEN ((_col19 > 0)) THEN ((_col23 / _col19)) ELSE (null) END > CASE WHEN ((_col7 > 0)) THEN ((_col11 / _col7)) ELSE (null) END) (type: boolean) + Statistics: Num rows: 2957777 Data size: 3001788770 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: int), (_col19 / _col15) (type: decimal(37,20)), (_col7 / _col3) (type: decimal(37,20)), (_col23 / _col19) (type: decimal(37,20)), (_col11 / _col7) (type: decimal(37,20)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2957777 Data size: 3001788770 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: decimal(37,20)) + sort order: + + Statistics: Num rows: 2957777 Data size: 3001788770 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col3 (type: decimal(37,20)), _col4 (type: decimal(37,20)), _col5 (type: decimal(37,20)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: decimal(37,20)), VALUE._col2 (type: decimal(37,20)), VALUE._col3 (type: decimal(37,20)), VALUE._col4 (type: decimal(37,20)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2957777 Data size: 3001788770 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2957777 Data size: 3001788770 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 2) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), 2 (type: int), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col7, _col2 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: int), _col5 (type: int), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: decimal(17,2)) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 3) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), 3 (type: int), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col7, _col2 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: int), _col5 (type: int), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: decimal(17,2)) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 1) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-20 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), 1 (type: int), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col7, _col2 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: int), _col5 (type: int), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-21 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: decimal(17,2)) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-24 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 2) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-25 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), 2 (type: int), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col7, _col2 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: int), _col5 (type: int), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-26 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: decimal(17,2)) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-29 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 3) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-30 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), 3 (type: int), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col7, _col2 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: int), _col5 (type: int), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-31 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: decimal(17,2)) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5500000 Data size: 5581839472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query34.q.out b/ql/src/test/results/clientpositive/perf/query34.q.out new file mode 100644 index 0000000..7867e3e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query34.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: explain select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) and (household_demographics.hd_buy_potential = '1001-5000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and (case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end) > 1.2 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County', 'Orange County','Appanoose County','Franklin Parish','Tehama County') group by ss_ticket_number,ss_customer_sk) dn,customer where dn.ss_customer_sk = customer.c_customer_sk and cnt between 15 and 20 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +PREHOOK: type: QUERY +POSTHOOK: query: explain select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) and (household_demographics.hd_buy_potential = '1001-5000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and (case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end) > 1.2 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County', 'Orange County','Appanoose County','Franklin Parish','Tehama County') group by ss_ticket_number,ss_customer_sk) dn,customer where dn.ss_customer_sk = customer.c_customer_sk and cnt between 15 and 20 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_hdemo_sk is not null) and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_county) IN ('Kittitas County', 'Adams County', 'Richland County', 'Furnas County', 'Orange County', 'Appanoose County', 'Franklin Parish', 'Tehama County') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((hd_buy_potential = '1001-5000') or (hd_buy_potential = '5001-10000')) and (hd_vehicle_count > 0)) and (CASE WHEN ((hd_vehicle_count > 0)) THEN ((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count))) ELSE (null) END > 1.2)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 400 Data size: 42800 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 400 Data size: 42800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 400 Data size: 42800 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 24305 Data size: 27199223 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24305 Data size: 27199223 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 24305 Data size: 27199223 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12152 Data size: 13599051 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 BETWEEN 15 AND 20 (type: boolean) + Statistics: Num rows: 6076 Data size: 6799525 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6076 Data size: 6799525 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6076 Data size: 6799525 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: bigint) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col5 (type: string), _col4 (type: string), _col7 (type: string), _col0 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: +++- + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query39.q.out b/ql/src/test/results/clientpositive/perf/query39.q.out new file mode 100644 index 0000000..dad0944 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query39.q.out @@ -0,0 +1,453 @@ +PREHOOK: query: explain with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =1999 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=3 and inv2.d_moy=3+1 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +POSTHOOK: query: explain with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =1999 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=3 and inv2.d_moy=3+1 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4, Stage-13 + Stage-6 depends on stages: Stage-5 + Stage-10 is a root stage + Stage-11 depends on stages: Stage-10 + Stage-12 depends on stages: Stage-11 + Stage-13 depends on stages: Stage-12 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((inv_item_sk is not null and inv_warehouse_sk is not null) and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: int) + TableScan + alias: warehouse + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5, _col6 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_year = 1999) and d_date_sk is not null) and (d_moy = 3)) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col5 (type: int), _col6 (type: string), 3 (type: int), _col3 (type: int) + outputColumnNames: _col4, _col5, _col6, _col9, _col3 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: stddev_samp(_col3), avg(_col3) + keys: _col4 (type: int), _col5 (type: int), _col6 (type: string), _col9 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 153730 Data size: 220799036 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 153730 Data size: 220799036 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CASE (_col5) WHEN (0) THEN (0) ELSE ((_col4 / _col5)) END > 1.0) (type: boolean) + Statistics: Num rows: 51243 Data size: 73599199 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), _col5 (type: double), CASE (_col5) WHEN (0) THEN (null) ELSE ((_col4 / _col5)) END (type: double) + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 51243 Data size: 73599199 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 51243 Data size: 73599199 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: double), _col5 (type: double) + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 51243 Data size: 73599199 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: double), _col5 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col2 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col7, _col8, _col10, _col11 + Statistics: Num rows: 56367 Data size: 80959120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), _col5 (type: double), _col7 (type: int), _col8 (type: int), _col10 (type: double), _col11 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col8, _col9 + Statistics: Num rows: 56367 Data size: 80959120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), 3 (type: int), _col3 (type: double), _col4 (type: double), 4 (type: int), _col8 (type: double), _col9 (type: double) + sort order: ++++++++ + Statistics: Num rows: 56367 Data size: 80959120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), 3 (type: int), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), VALUE._col0 (type: int), VALUE._col1 (type: int), 4 (type: int), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 56367 Data size: 80959120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56367 Data size: 80959120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((inv_item_sk is not null and inv_warehouse_sk is not null) and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: int) + TableScan + alias: warehouse + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5, _col6 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_year = 1999) and d_date_sk is not null) and (d_moy = 4)) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col5 (type: int), _col6 (type: string), 4 (type: int), _col3 (type: int) + outputColumnNames: _col4, _col5, _col6, _col9, _col3 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: stddev_samp(_col3), avg(_col3) + keys: _col4 (type: int), _col5 (type: int), _col6 (type: string), _col9 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 153730 Data size: 220799036 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 153730 Data size: 220799036 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CASE (_col5) WHEN (0) THEN (0) ELSE ((_col4 / _col5)) END > 1.0) (type: boolean) + Statistics: Num rows: 51243 Data size: 73599199 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), _col5 (type: double), CASE (_col5) WHEN (0) THEN (null) ELSE ((_col4 / _col5)) END (type: double) + outputColumnNames: _col1, _col2, _col4, _col5 + Statistics: Num rows: 51243 Data size: 73599199 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =1999 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=3 and inv2.d_moy=3+1 and inv1.cov > 1.5 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +#### A masked pattern was here #### +POSTHOOK: query: with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean from inventory ,item ,warehouse ,date_dim where inv_item_sk = i_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk and d_year =1999 group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo where case mean when 0 then 0 else stdev/mean end > 1) select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov from inv inv1,inv inv2 where inv1.i_item_sk = inv2.i_item_sk and inv1.w_warehouse_sk = inv2.w_warehouse_sk and inv1.d_moy=3 and inv2.d_moy=3+1 and inv1.cov > 1.5 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/perf/query40.q.out b/ql/src/test/results/clientpositive/perf/query40.q.out new file mode 100644 index 0000000..fe0692a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query40.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: explain select w_state ,i_item_id ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after from catalog_sales left outer join catalog_returns on (catalog_sales.cs_order_number = catalog_returns.cr_order_number and catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) ,warehouse ,item ,date_dim where i_current_price between 0.99 and 1.49 and item.i_item_sk = catalog_sales.cs_item_sk and catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and date_dim.d_date between '1998-03-09' and '1998-05-08' group by w_state,i_item_id order by w_state,i_item_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select w_state ,i_item_id ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after from catalog_sales left outer join catalog_returns on (catalog_sales.cs_order_number = catalog_returns.cr_order_number and catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) ,warehouse ,item ,date_dim where i_current_price between 0.99 and 1.49 and item.i_item_sk = catalog_sales.cs_item_sk and catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk and catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and date_dim.d_date between '1998-03-09' and '1998-05-08' group by w_state,i_item_id order by w_state,i_item_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_warehouse_sk is not null and cs_item_sk is not null) and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_warehouse_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col3 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_refunded_cash (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col3 (type: int), _col2 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: warehouse + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: w_warehouse_sk (type: int), w_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col7, _col9 + Statistics: Num rows: 15 Data size: 15856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 15 Data size: 15856 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col7, _col9, _col11 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: string), _col11 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN '1998-03-09' AND '1998-05-08' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col7, _col9, _col11, _col14 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col14 AS DATE) < 1998-04-08)) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col14 AS DATE) >= 1998-04-08)) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(23,2)), _col3 (type: decimal(23,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(23,2)), _col3 (type: decimal(23,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(23,2)), VALUE._col1 (type: decimal(23,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query42.q.out b/ql/src/test/results/clientpositive/perf/query42.q.out new file mode 100644 index 0000000..3d90de7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query42.q.out @@ -0,0 +1,169 @@ +PREHOOK: query: explain select dt.d_year ,item.i_category_id ,item.i_category ,sum(ss_ext_sales_price) as s from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_category_id ,item.i_category order by s desc,dt.d_year ,item.i_category_id ,item.i_category limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select dt.d_year ,item.i_category_id ,item.i_category ,sum(ss_ext_sales_price) as s from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_category_id ,item.i_category order by s desc,dt.d_year ,item.i_category_id ,item.i_category limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dt + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 12) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_category_id (type: int), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), _col7 (type: int), _col8 (type: string), _col5 (type: decimal(7,2)) + outputColumnNames: _col1, _col7, _col8, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col1 (type: int), _col7 (type: int), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: decimal(17,2)), _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: -+++ + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query43.q.out b/ql/src/test/results/clientpositive/perf/query43.q.out new file mode 100644 index 0000000..8b44938 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query43.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: explain select s_store_name, s_store_id, sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales from date_dim, store_sales, store where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk and s_gmt_offset = -6 and d_year = 1998 group by s_store_name, s_store_id order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s_store_name, s_store_id, sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales from date_dim, store_sales, store where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk and s_gmt_offset = -6 and d_year = 1998 group by s_store_name, s_store_id order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_day_name (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col5 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_gmt_offset = -6) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col5, _col7, _col8 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col7 (type: string), CASE WHEN ((_col2 = 'Sunday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col2 = 'Monday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col2 = 'Tuesday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col2 = 'Wednesday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col2 = 'Thursday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col2 = 'Friday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)), CASE WHEN ((_col2 = 'Saturday')) THEN (_col5) ELSE (null) END (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + sort order: +++++++++ + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: decimal(17,2)), KEY.reducesinkkey6 (type: decimal(17,2)), KEY.reducesinkkey7 (type: decimal(17,2)), KEY.reducesinkkey8 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query45.q.out b/ql/src/test/results/clientpositive/perf/query45.q.out new file mode 100644 index 0000000..e89666e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query45.q.out @@ -0,0 +1,306 @@ +PREHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN item ON web_sales.ws_item_sk = item.i_item_sk where ( item.i_item_id in (select i_item_id from item i2 where i2.i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) and d_qoy = 2 and d_year = 2000 group by ca_zip, ca_county order by ca_zip, ca_county limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN item ON web_sales.ws_item_sk = item.i_item_sk where ( item.i_item_id in (select i_item_id from item i2 where i2.i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) and d_qoy = 2 and d_year = 2000 group by ca_zip, ca_county order by ca_zip, ca_county limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-10 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-10 is a root stage + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ws_bill_customer_sk is not null and ws_sold_date_sk is not null) and ws_item_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col7, _col8 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_qoy = 2) and (d_year = 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col7, _col8 + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26620000 Data size: 22893739161 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col7, _col8 + Statistics: Num rows: 29282000 Data size: 25183113622 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col8 (type: string), _col3 (type: decimal(7,2)) + outputColumnNames: _col7, _col8, _col3 + Statistics: Num rows: 29282000 Data size: 25183113622 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col7 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29282000 Data size: 25183113622 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29282000 Data size: 25183113622 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14641000 Data size: 12591556811 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14641000 Data size: 12591556811 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 14641000 Data size: 12591556811 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14641000 Data size: 12591556811 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) and i_item_id is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_id (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query46.q.out b/ql/src/test/results/clientpositive/perf/query46.q.out new file mode 100644 index 0000000..1cc1bd5 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query46.q.out @@ -0,0 +1,351 @@ +PREHOOK: query: explain select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,amt,profit from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 2) and date_dim.d_dow in (6,0) and date_dim.d_year in (1998,1998+1,1998+2) and store.s_city in ('Rosedale','Bethlehem','Clinton','Clifton','Springfield') group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr where dn.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,amt,profit from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 2) and date_dim.d_dow in (6,0) and date_dim.d_year in (1998,1998+1,1998+2) and store.s_city in ('Rosedale','Bethlehem','Clinton','Clifton','Springfield') group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr where dn.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_hdemo_sk is not null) and ss_addr_sk is not null) and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_coupon_amt (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_city) IN ('Rosedale', 'Bethlehem', 'Clinton', 'Clifton', 'Springfield') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hd_dep_count = 4) or (hd_vehicle_count = 2)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col17 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col17 (type: string), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + outputColumnNames: _col1, _col3, _col5, _col17, _col6, _col7 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col6), sum(_col7) + keys: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col17 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col6, _col7, _col8 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: string), _col8 (type: string) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col7, _col8, _col10 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col10 <> _col2) (type: boolean) + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col7 (type: string), _col10 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + sort order: +++++ + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query48.q.out b/ql/src/test/results/clientpositive/perf/query48.q.out new file mode 100644 index 0000000..57f6489 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query48.q.out @@ -0,0 +1,227 @@ +PREHOOK: query: explain select sum (ss_quantity) from store_sales, store, customer_demographics, customer_address, date_dim where store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_year = 1998 and ( ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 100.00 and 150.00 ) or ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 50.00 and 100.00 ) or ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 150.00 and 200.00 ) ) and ( ( store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('KY', 'GA', 'NM') and ss_net_profit between 0 and 2000 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('MT', 'OR', 'IN') and ss_net_profit between 150 and 3000 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('WI', 'MO', 'WV') and ss_net_profit between 50 and 25000 ) ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum (ss_quantity) from store_sales, store, customer_demographics, customer_address, date_dim where store.s_store_sk = store_sales.ss_store_sk and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_year = 1998 and ( ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 100.00 and 150.00 ) or ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 50.00 and 100.00 ) or ( customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk and cd_marital_status = 'M' and cd_education_status = '4 yr Degree' and ss_sales_price between 150.00 and 200.00 ) ) and ( ( store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('KY', 'GA', 'NM') and ss_net_profit between 0 and 2000 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('MT', 'OR', 'IN') and ss_net_profit between 150 and 3000 ) or (store_sales.ss_addr_sk = customer_address.ca_address_sk and ca_country = 'United States' and ca_state in ('WI', 'MO', 'WV') and ss_net_profit between 50 and 25000 ) ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((((ss_sales_price BETWEEN 100.0 AND 150.0 or ss_sales_price BETWEEN 50.0 AND 100.0 or ss_sales_price BETWEEN 150.0 AND 200.0) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000)) and ss_store_sk is not null) and ss_cdemo_sk is not null) and ss_addr_sk is not null) and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col6 + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) + TableScan + alias: customer_demographics + Statistics: Num rows: 547800 Data size: 71753859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree')) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 68475 Data size: 8969232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 68475 Data size: 8969232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 68475 Data size: 8969232 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col6 + Statistics: Num rows: 75322 Data size: 9866155 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 75322 Data size: 9866155 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col6, _col12 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12) IN ('KY', 'GA', 'NM') and _col6 BETWEEN 0 AND 2000) or ((_col12) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 AND 3000) or ((_col12) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 25000)) (type: boolean) + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4 + Statistics: Num rows: 9075000 Data size: 9210035328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query50.q.out b/ql/src/test/results/clientpositive/perf/query50.q.out new file mode 100644 index 0000000..7ad68e0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query50.q.out @@ -0,0 +1,255 @@ +PREHOOK: query: explain select s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as 30days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as 3160days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as 6190days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as 91120days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as 120days from store_sales ,store_returns ,store ,date_dim d1 ,date_dim d2 where d2.d_year = 2000 and d2.d_moy = 9 and store_sales.ss_ticket_number = store_returns.sr_ticket_number and store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_sold_date_sk = d1.d_date_sk and sr_returned_date_sk = d2.d_date_sk and store_sales.ss_customer_sk = store_returns.sr_customer_sk and store_sales.ss_store_sk = store.s_store_sk group by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip order by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as 30days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as 3160days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as 6190days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as 91120days ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as 120days from store_sales ,store_returns ,store ,date_dim d1 ,date_dim d2 where d2.d_year = 2000 and d2.d_moy = 9 and store_sales.ss_ticket_number = store_returns.sr_ticket_number and store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_sold_date_sk = d1.d_date_sk and sr_returned_date_sk = d2.d_date_sk and store_sales.ss_customer_sk = store_returns.sr_customer_sk and store_sales.ss_store_sk = store.s_store_sk group by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip order by s_store_name ,s_company_id ,s_street_number ,s_street_name ,s_street_type ,s_suite_number ,s_city ,s_county ,s_state ,s_zip limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((ss_ticket_number is not null and ss_item_sk is not null) and ss_customer_sk is not null) and ss_store_sk is not null) and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col4 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((sr_ticket_number is not null and sr_item_sk is not null) and sr_customer_sk is not null) and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col3 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int), _col1 (type: int), _col2 (type: int) + 1 _col3 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col5 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_name (type: string), s_company_id (type: int), s_street_number (type: string), s_street_name (type: string), s_street_type (type: string), s_suite_number (type: string), s_city (type: string), s_county (type: string), s_state (type: string), s_zip (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col5, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col5, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 40177 Data size: 44958623 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 40177 Data size: 44958623 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_year = 2000) and (d_moy = 9)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col5, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 44194 Data size: 49454486 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: string), _col11 (type: int), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), CASE WHEN (((_col5 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 30) and ((_col5 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 60) and ((_col5 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col5 - _col0) > 90) and ((_col5 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col5 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 44194 Data size: 49454486 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 44194 Data size: 49454486 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + sort order: ++++++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + Statistics: Num rows: 44194 Data size: 49454486 Basic stats: COMPLETE Column stats: NONE + value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 22097 Data size: 24727243 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + sort order: ++++++++++ + Statistics: Num rows: 22097 Data size: 24727243 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 22097 Data size: 24727243 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query52.q.out b/ql/src/test/results/clientpositive/perf/query52.q.out new file mode 100644 index 0000000..f6aecf0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query52.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: explain select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) ext_price from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,ext_price desc ,brand_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select dt.d_year ,item.i_brand_id brand_id ,item.i_brand brand ,sum(ss_ext_sales_price) ext_price from date_dim dt ,store_sales ,item where dt.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manager_id = 1 and dt.d_moy=12 and dt.d_year=1998 group by dt.d_year ,item.i_brand ,item.i_brand_id order by dt.d_year ,ext_price desc ,brand_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dt + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 12) and (d_year = 1998)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1998 (type: int), _col7 (type: int), _col8 (type: string), _col5 (type: decimal(7,2)) + outputColumnNames: _col1, _col7, _col8, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col1 (type: int), _col7 (type: int), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: decimal(17,2)), _col1 (type: int) + sort order: +-+ + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query54.q.out b/ql/src/test/results/clientpositive/perf/query54.q.out new file mode 100644 index 0000000..0b55f85 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query54.q.out @@ -0,0 +1,464 @@ +PREHOOK: query: explain with my_customers as ( select c_customer_sk , c_current_addr_sk from ( select cs_sold_date_sk sold_date_sk, cs_bill_customer_sk customer_sk, cs_item_sk item_sk from catalog_sales union all select ws_sold_date_sk sold_date_sk, ws_bill_customer_sk customer_sk, ws_item_sk item_sk from web_sales ) cs_or_ws_sales, item, date_dim, customer where sold_date_sk = d_date_sk and item_sk = i_item_sk and i_category = 'Jewelry' and i_class = 'football' and c_customer_sk = cs_or_ws_sales.customer_sk and d_moy = 3 and d_year = 2000 group by c_customer_sk , c_current_addr_sk ) , my_revenue as ( select c_customer_sk, sum(ss_ext_sales_price) as revenue from my_customers, store_sales, customer_address, store, date_dim where c_current_addr_sk = ca_address_sk and ca_county = s_county and ca_state = s_state and ss_sold_date_sk = d_date_sk and c_customer_sk = ss_customer_sk and d_month_seq between (1203) and (1205) group by c_customer_sk ) , segments as (select cast((revenue/50) as int) as segment from my_revenue ) select segment, count(*) as num_customers, segment*50 as segment_base from segments group by segment order by segment, num_customers limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain with my_customers as ( select c_customer_sk , c_current_addr_sk from ( select cs_sold_date_sk sold_date_sk, cs_bill_customer_sk customer_sk, cs_item_sk item_sk from catalog_sales union all select ws_sold_date_sk sold_date_sk, ws_bill_customer_sk customer_sk, ws_item_sk item_sk from web_sales ) cs_or_ws_sales, item, date_dim, customer where sold_date_sk = d_date_sk and item_sk = i_item_sk and i_category = 'Jewelry' and i_class = 'football' and c_customer_sk = cs_or_ws_sales.customer_sk and d_moy = 3 and d_year = 2000 group by c_customer_sk , c_current_addr_sk ) , my_revenue as ( select c_customer_sk, sum(ss_ext_sales_price) as revenue from my_customers, store_sales, customer_address, store, date_dim where c_current_addr_sk = ca_address_sk and ca_county = s_county and ca_state = s_state and ss_sold_date_sk = d_date_sk and c_customer_sk = ss_customer_sk and d_month_seq between (1203) and (1205) group by c_customer_sk ) , segments as (select cast((revenue/50) as int) as segment from my_revenue ) select segment, count(*) as num_customers, segment*50 as segment_base from segments group by segment order by segment, num_customers limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-11 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_item_sk is not null and cs_sold_date_sk is not null) and cs_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ws_item_sk is not null and ws_sold_date_sk is not null) and ws_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_item_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((i_category = 'Jewelry') and (i_class = 'football')) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 57750 Data size: 82945057 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 57750 Data size: 82945057 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57750 Data size: 82945057 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 3) and (d_year = 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 69877 Data size: 100363522 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 69877 Data size: 100363522 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col9, _col10 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col9 (type: int), _col10 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11000000 Data size: 9460222384 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11000000 Data size: 9460222384 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 12100000 Data size: 10406244847 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 12100000 Data size: 10406244847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_address_sk is not null and ca_county is not null) and ca_state is not null) (type: boolean) + Statistics: Num rows: 5000000 Data size: 5074399410 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5000000 Data size: 5074399410 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5000000 Data size: 5074399410 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col6, _col7 + Statistics: Num rows: 13310000 Data size: 11446869579 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: string), _col7 (type: string) + sort order: ++ + Map-reduce partition columns: _col6 (type: string), _col7 (type: string) + Statistics: Num rows: 13310000 Data size: 11446869579 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s_county is not null and s_state is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_county (type: string), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: string), _col7 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 14641000 Data size: 12591556809 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 14641000 Data size: 12591556809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1203 AND 1205 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 16105100 Data size: 13850712790 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16105100 Data size: 13850712790 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 16105100 Data size: 13850712790 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8052550 Data size: 6925356395 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger((_col1 / 50)) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8052550 Data size: 6925356395 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8052550 Data size: 6925356395 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8052550 Data size: 6925356395 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4026275 Data size: 3462678197 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), (_col0 * 50) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4026275 Data size: 3462678197 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Statistics: Num rows: 4026275 Data size: 3462678197 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4026275 Data size: 3462678197 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query55.q.out b/ql/src/test/results/clientpositive/perf/query55.q.out new file mode 100644 index 0000000..1c9e757 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query55.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: explain select i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and i_manager_id=36 and d_moy=12 and d_year=2001 group by i_brand, i_brand_id order by ext_price desc, i_brand_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_brand_id brand_id, i_brand brand, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and i_manager_id=36 and d_moy=12 and d_year=2001 group by i_brand, i_brand_id order by ext_price desc, i_brand_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 12) and (d_year = 2001)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), _col8 (type: string), _col5 (type: decimal(7,2)) + outputColumnNames: _col7, _col8, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col7 (type: int), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: decimal(17,2)), _col0 (type: int) + sort order: -+ + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63525 Data size: 91239564 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query64.q.out b/ql/src/test/results/clientpositive/perf/query64.q.out new file mode 100644 index 0000000..958202d --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query64.q.out @@ -0,0 +1,1758 @@ +PREHOOK: query: explain select cs1.product_name ,cs1.store_name ,cs1.store_zip ,cs1.b_street_number ,cs1.b_streen_name ,cs1.b_city ,cs1.b_zip ,cs1.c_street_number ,cs1.c_street_name ,cs1.c_city ,cs1.c_zip ,cs1.syear ,cs1.cnt ,cs1.s1 ,cs1.s2 ,cs1.s3 ,cs2.s1 ,cs2.s2 ,cs2.s3 ,cs2.syear ,cs2.cnt from (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as store_name ,s_zip as store_zip ,ad1.ca_street_number as b_street_number ,ad1.ca_street_name as b_streen_name ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as c_street_number ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip as c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*) as cnt ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2 ,sum(ss_coupon_amt) as s3 FROM store_sales JOIN store_returns ON store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk= cd1.cd_demo_sk JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk = cd2.cd_demo_sk JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk = hd1.hd_demo_sk JOIN household_demographics hd2 ON customer.c_current_hdemo_sk = hd2.hd_demo_sk JOIN customer_address ad1 ON store_sales.ss_addr_sk = ad1.ca_address_sk JOIN customer_address ad2 ON customer.c_current_addr_sk = ad2.ca_address_sk JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk JOIN item ON store_sales.ss_item_sk = item.i_item_sk JOIN (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales JOIN catalog_returns ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk and catalog_sales.cs_order_number = catalog_returns.cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)) cs_ui ON store_sales.ss_item_sk = cs_ui.cs_item_sk WHERE cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and i_current_price between 35 and 35 + 10 and i_current_price between 35 + 1 and 35 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) cs1 JOIN (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as store_name ,s_zip as store_zip ,ad1.ca_street_number as b_street_number ,ad1.ca_street_name as b_streen_name ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as c_street_number ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip as c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*) as cnt ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2 ,sum(ss_coupon_amt) as s3 FROM store_sales JOIN store_returns ON store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk= cd1.cd_demo_sk JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk = cd2.cd_demo_sk JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk = hd1.hd_demo_sk JOIN household_demographics hd2 ON customer.c_current_hdemo_sk = hd2.hd_demo_sk JOIN customer_address ad1 ON store_sales.ss_addr_sk = ad1.ca_address_sk JOIN customer_address ad2 ON customer.c_current_addr_sk = ad2.ca_address_sk JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk JOIN item ON store_sales.ss_item_sk = item.i_item_sk JOIN (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales JOIN catalog_returns ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk and catalog_sales.cs_order_number = catalog_returns.cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)) cs_ui ON store_sales.ss_item_sk = cs_ui.cs_item_sk WHERE cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and i_current_price between 35 and 35 + 10 and i_current_price between 35 + 1 and 35 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) cs2 ON cs1.item_sk=cs2.item_sk where cs1.syear = 2000 and cs2.syear = 2000 + 1 and cs2.cnt <= cs1.cnt and cs1.store_name = cs2.store_name and cs1.store_zip = cs2.store_zip order by cs1.product_name ,cs1.store_name ,cs2.cnt +PREHOOK: type: QUERY +POSTHOOK: query: explain select cs1.product_name ,cs1.store_name ,cs1.store_zip ,cs1.b_street_number ,cs1.b_streen_name ,cs1.b_city ,cs1.b_zip ,cs1.c_street_number ,cs1.c_street_name ,cs1.c_city ,cs1.c_zip ,cs1.syear ,cs1.cnt ,cs1.s1 ,cs1.s2 ,cs1.s3 ,cs2.s1 ,cs2.s2 ,cs2.s3 ,cs2.syear ,cs2.cnt from (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as store_name ,s_zip as store_zip ,ad1.ca_street_number as b_street_number ,ad1.ca_street_name as b_streen_name ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as c_street_number ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip as c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*) as cnt ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2 ,sum(ss_coupon_amt) as s3 FROM store_sales JOIN store_returns ON store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk= cd1.cd_demo_sk JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk = cd2.cd_demo_sk JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk = hd1.hd_demo_sk JOIN household_demographics hd2 ON customer.c_current_hdemo_sk = hd2.hd_demo_sk JOIN customer_address ad1 ON store_sales.ss_addr_sk = ad1.ca_address_sk JOIN customer_address ad2 ON customer.c_current_addr_sk = ad2.ca_address_sk JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk JOIN item ON store_sales.ss_item_sk = item.i_item_sk JOIN (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales JOIN catalog_returns ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk and catalog_sales.cs_order_number = catalog_returns.cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)) cs_ui ON store_sales.ss_item_sk = cs_ui.cs_item_sk WHERE cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and i_current_price between 35 and 35 + 10 and i_current_price between 35 + 1 and 35 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) cs1 JOIN (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as store_name ,s_zip as store_zip ,ad1.ca_street_number as b_street_number ,ad1.ca_street_name as b_streen_name ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as c_street_number ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip as c_zip ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*) as cnt ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2 ,sum(ss_coupon_amt) as s3 FROM store_sales JOIN store_returns ON store_sales.ss_item_sk = store_returns.sr_item_sk and store_sales.ss_ticket_number = store_returns.sr_ticket_number JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk= cd1.cd_demo_sk JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk = cd2.cd_demo_sk JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk = hd1.hd_demo_sk JOIN household_demographics hd2 ON customer.c_current_hdemo_sk = hd2.hd_demo_sk JOIN customer_address ad1 ON store_sales.ss_addr_sk = ad1.ca_address_sk JOIN customer_address ad2 ON customer.c_current_addr_sk = ad2.ca_address_sk JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk JOIN item ON store_sales.ss_item_sk = item.i_item_sk JOIN (select cs_item_sk ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund from catalog_sales JOIN catalog_returns ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk and catalog_sales.cs_order_number = catalog_returns.cr_order_number group by cs_item_sk having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)) cs_ui ON store_sales.ss_item_sk = cs_ui.cs_item_sk WHERE cd1.cd_marital_status <> cd2.cd_marital_status and i_color in ('maroon','burnished','dim','steel','navajo','chocolate') and i_current_price between 35 and 35 + 10 and i_current_price between 35 + 1 and 35 + 15 group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year ,d3.d_year ) cs2 ON cs1.item_sk=cs2.item_sk where cs1.syear = 2000 and cs2.syear = 2000 + 1 and cs2.cnt <= cs1.cnt and cs1.store_name = cs2.store_name and cs1.store_zip = cs2.store_zip order by cs1.product_name ,cs1.store_name ,cs2.cnt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-10 + Stage-12 depends on stages: Stage-11 + Stage-13 depends on stages: Stage-12 + Stage-14 depends on stages: Stage-13 + Stage-15 depends on stages: Stage-14 + Stage-16 depends on stages: Stage-15, Stage-37 + Stage-17 depends on stages: Stage-16 + Stage-18 depends on stages: Stage-17, Stage-55 + Stage-19 depends on stages: Stage-18 + Stage-36 is a root stage + Stage-37 depends on stages: Stage-36 + Stage-39 is a root stage + Stage-40 depends on stages: Stage-39 + Stage-41 depends on stages: Stage-40 + Stage-42 depends on stages: Stage-41 + Stage-43 depends on stages: Stage-42 + Stage-44 depends on stages: Stage-43 + Stage-45 depends on stages: Stage-44 + Stage-46 depends on stages: Stage-45 + Stage-47 depends on stages: Stage-46 + Stage-48 depends on stages: Stage-47 + Stage-49 depends on stages: Stage-48 + Stage-50 depends on stages: Stage-49 + Stage-51 depends on stages: Stage-50 + Stage-52 depends on stages: Stage-51 + Stage-53 depends on stages: Stage-52 + Stage-54 depends on stages: Stage-53, Stage-73 + Stage-55 depends on stages: Stage-54 + Stage-72 is a root stage + Stage-73 depends on stages: Stage-72 + Stage-0 depends on stages: Stage-19 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((((((ss_item_sk is not null and ss_ticket_number is not null) and ss_customer_sk is not null) and ss_sold_date_sk is not null) and ss_store_sk is not null) and ss_cdemo_sk is not null) and ss_promo_sk is not null) and ss_hdemo_sk is not null) and ss_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ticket_number (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_list_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col8 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col8 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col8 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((c_customer_sk is not null and c_first_sales_date_sk is not null) and c_first_shipto_date_sk is not null) and c_current_cdemo_sk is not null) and c_current_hdemo_sk is not null) and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1250000 Data size: 1075025247 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int), c_first_shipto_date_sk (type: int), c_first_sales_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1250000 Data size: 1075025247 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1250000 Data size: 1075025247 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1375000 Data size: 1182527797 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1375000 Data size: 1182527797 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col18 (type: int), _col19 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and (d_year = 2000)) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1512500 Data size: 1300780604 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col19 (type: int) + sort order: + + Map-reduce partition columns: _col19 (type: int) + Statistics: Num rows: 1512500 Data size: 1300780604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col18 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col19 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col23 + Statistics: Num rows: 1663750 Data size: 1430858695 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col18 (type: int) + sort order: + + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 1663750 Data size: 1430858695 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col23 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col18 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col23, _col25 + Statistics: Num rows: 1830125 Data size: 1573944598 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 1830125 Data size: 1573944598 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_store_sk is not null and s_store_name is not null) and s_zip is not null) (type: boolean) + Statistics: Num rows: 213 Data size: 407034 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_name (type: string), s_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 213 Data size: 407034 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 213 Data size: 407034 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col23, _col25, _col27, _col28 + Statistics: Num rows: 2013137 Data size: 1731339095 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 2013137 Data size: 1731339095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string) + TableScan + alias: cd1 + Statistics: Num rows: 667800 Data size: 71765859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col23, _col25, _col27, _col28, _col30 + Statistics: Num rows: 2214450 Data size: 1904473045 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col15 (type: int) + sort order: + + Map-reduce partition columns: _col15 (type: int) + Statistics: Num rows: 2214450 Data size: 1904473045 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col30 (type: string) + TableScan + alias: cd1 + Statistics: Num rows: 667800 Data size: 71765859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col15 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col7, _col9, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28, _col30, _col32 + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col30 <> _col32) (type: boolean) + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)) + outputColumnNames: _col1, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28, _col4, _col5, _col7, _col9 + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col7 (type: int) + sort order: + + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_promo_sk is not null (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col9, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28 + Statistics: Num rows: 2679484 Data size: 2304412483 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 2679484 Data size: 2304412483 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string) + TableScan + alias: hd1 + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hd_demo_sk is not null and hd_income_band_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int), hd_income_band_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col9, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28, _col35 + Statistics: Num rows: 2947432 Data size: 2534853786 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col16 (type: int) + sort order: + + Map-reduce partition columns: _col16 (type: int) + Statistics: Num rows: 2947432 Data size: 2534853786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col35 (type: int) + TableScan + alias: hd1 + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hd_demo_sk is not null and hd_income_band_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int), hd_income_band_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col16 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col9, _col10, _col11, _col17, _col23, _col25, _col27, _col28, _col35, _col37 + Statistics: Num rows: 3242175 Data size: 2788339225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 3242175 Data size: 2788339225 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col35 (type: int), _col37 (type: int) + TableScan + alias: ad1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_street_number (type: string), ca_street_name (type: string), ca_city (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col17, _col23, _col25, _col27, _col28, _col35, _col37, _col39, _col40, _col41, _col42 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col17 (type: int) + sort order: + + Map-reduce partition columns: _col17 (type: int) + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col35 (type: int), _col37 (type: int), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string) + TableScan + alias: ad1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_street_number (type: string), ca_street_name (type: string), ca_city (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col17 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col35, _col37, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col35 (type: int) + sort order: + + Map-reduce partition columns: _col35 (type: int) + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col37 (type: int), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string) + TableScan + alias: ib1 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ib_income_band_sk is not null (type: boolean) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ib_income_band_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col35 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col37, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47 + Statistics: Num rows: 26620000 Data size: 27016104217 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col37 (type: int) + sort order: + + Map-reduce partition columns: _col37 (type: int) + Statistics: Num rows: 26620000 Data size: 27016104217 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string) + TableScan + alias: ib1 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ib_income_band_sk is not null (type: boolean) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ib_income_band_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col37 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47 + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE + value expressions: _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45) and i_current_price BETWEEN 36 AND 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 28875 Data size: 41472528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_product_name (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 28875 Data size: 41472528 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28875 Data size: 41472528 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47, _col50, _col53 + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2000 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string), _col50 (type: int), _col53 (type: string), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + outputColumnNames: _col21, _col23, _col25, _col27, _col28, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47, _col50, _col53, _col9, _col10, _col11 + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col9), sum(_col10), sum(_col11) + keys: _col21 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string), _col50 (type: int), _col53 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string) + sort order: +++++++++++++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string) + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string), KEY._col10 (type: string), KEY._col11 (type: string), KEY._col12 (type: string), KEY._col13 (type: int), KEY._col14 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 32210200 Data size: 32689487011 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col14 (type: string), _col13 (type: int), _col11 (type: string), _col12 (type: string), _col0 (type: int), _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col15, _col16, _col17, _col18, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 32210200 Data size: 32689487011 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 32210200 Data size: 32689487011 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: int), _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 32210200 Data size: 32689487011 Basic stats: COMPLETE Column stats: NONE + value expressions: _col12 (type: int), _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: string), _col3 (type: string) + 1 _col1 (type: int), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col15, _col16, _col17, _col18, _col31, _col34, _col35, _col36, _col37 + Statistics: Num rows: 35431220 Data size: 35958436491 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col34 <= _col15) (type: boolean) + Statistics: Num rows: 11810406 Data size: 11986144820 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: int), _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)), _col35 (type: decimal(17,2)), _col36 (type: decimal(17,2)), _col37 (type: decimal(17,2)), _col31 (type: int), _col34 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 11810406 Data size: 11986144820 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col20 (type: bigint) + sort order: +++ + Statistics: Num rows: 11810406 Data size: 11986144820 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: int), _col12 (type: bigint), _col13 (type: decimal(17,2)), _col14 (type: decimal(17,2)), _col15 (type: decimal(17,2)), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)), _col19 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: string), VALUE._col9 (type: int), VALUE._col10 (type: bigint), VALUE._col11 (type: decimal(17,2)), VALUE._col12 (type: decimal(17,2)), VALUE._col13 (type: decimal(17,2)), VALUE._col14 (type: decimal(17,2)), VALUE._col15 (type: decimal(17,2)), VALUE._col16 (type: decimal(17,2)), VALUE._col17 (type: int), KEY.reducesinkkey2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 11810406 Data size: 11986144820 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11810406 Data size: 11986144820 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-36 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_order_number is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_item_sk (type: int), cs_order_number (type: int), cs_ext_list_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cr_item_sk is not null and cr_order_number is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_refunded_cash (type: decimal(7,2)), cr_reversed_charge (type: decimal(7,2)), cr_store_credit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(7,2)), ((_col5 + _col6) + _col7) (type: decimal(9,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-37 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col1 > (2 * _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-39 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((((((ss_item_sk is not null and ss_ticket_number is not null) and ss_customer_sk is not null) and ss_sold_date_sk is not null) and ss_store_sk is not null) and ss_cdemo_sk is not null) and ss_promo_sk is not null) and ss_hdemo_sk is not null) and ss_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ticket_number (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_list_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col8 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col8 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col8 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-40 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((c_customer_sk is not null and c_first_sales_date_sk is not null) and c_first_shipto_date_sk is not null) and c_current_cdemo_sk is not null) and c_current_hdemo_sk is not null) and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 1250000 Data size: 1075025247 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int), c_first_shipto_date_sk (type: int), c_first_sales_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1250000 Data size: 1075025247 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1250000 Data size: 1075025247 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1375000 Data size: 1182527797 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-41 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1375000 Data size: 1182527797 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col18 (type: int), _col19 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and (d_year = 2001)) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1512500 Data size: 1300780604 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-42 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col19 (type: int) + sort order: + + Map-reduce partition columns: _col19 (type: int) + Statistics: Num rows: 1512500 Data size: 1300780604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col18 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col19 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col23 + Statistics: Num rows: 1663750 Data size: 1430858695 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-43 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col18 (type: int) + sort order: + + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 1663750 Data size: 1430858695 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col23 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col18 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col23, _col25 + Statistics: Num rows: 1830125 Data size: 1573944598 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-44 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 1830125 Data size: 1573944598 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_store_sk is not null and s_store_name is not null) and s_zip is not null) (type: boolean) + Statistics: Num rows: 213 Data size: 407034 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_name (type: string), s_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 213 Data size: 407034 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 213 Data size: 407034 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col23, _col25, _col27, _col28 + Statistics: Num rows: 2013137 Data size: 1731339095 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-45 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 2013137 Data size: 1731339095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col15 (type: int), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string) + TableScan + alias: cd1 + Statistics: Num rows: 667800 Data size: 71765859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col7, _col9, _col10, _col11, _col15, _col16, _col17, _col23, _col25, _col27, _col28, _col30 + Statistics: Num rows: 2214450 Data size: 1904473045 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-46 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col15 (type: int) + sort order: + + Map-reduce partition columns: _col15 (type: int) + Statistics: Num rows: 2214450 Data size: 1904473045 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col30 (type: string) + TableScan + alias: cd1 + Statistics: Num rows: 667800 Data size: 71765859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 333900 Data size: 35882929 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col15 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col7, _col9, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28, _col30, _col32 + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col30 <> _col32) (type: boolean) + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col4 (type: int), _col5 (type: int), _col7 (type: int), _col9 (type: decimal(7,2)) + outputColumnNames: _col1, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28, _col4, _col5, _col7, _col9 + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-47 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col7 (type: int) + sort order: + + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 2435895 Data size: 2094920394 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_promo_sk is not null (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col9, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28 + Statistics: Num rows: 2679484 Data size: 2304412483 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-48 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 2679484 Data size: 2304412483 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col16 (type: int), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string) + TableScan + alias: hd1 + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hd_demo_sk is not null and hd_income_band_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int), hd_income_band_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col9, _col10, _col11, _col16, _col17, _col23, _col25, _col27, _col28, _col35 + Statistics: Num rows: 2947432 Data size: 2534853786 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-49 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col16 (type: int) + sort order: + + Map-reduce partition columns: _col16 (type: int) + Statistics: Num rows: 2947432 Data size: 2534853786 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col35 (type: int) + TableScan + alias: hd1 + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hd_demo_sk is not null and hd_income_band_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int), hd_income_band_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col16 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col9, _col10, _col11, _col17, _col23, _col25, _col27, _col28, _col35, _col37 + Statistics: Num rows: 3242175 Data size: 2788339225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-50 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 3242175 Data size: 2788339225 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col17 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col35 (type: int), _col37 (type: int) + TableScan + alias: ad1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_street_number (type: string), ca_street_name (type: string), ca_city (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col17, _col23, _col25, _col27, _col28, _col35, _col37, _col39, _col40, _col41, _col42 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-51 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col17 (type: int) + sort order: + + Map-reduce partition columns: _col17 (type: int) + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col35 (type: int), _col37 (type: int), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string) + TableScan + alias: ad1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_street_number (type: string), ca_street_name (type: string), ca_city (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col17 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col35, _col37, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-52 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col35 (type: int) + sort order: + + Map-reduce partition columns: _col35 (type: int) + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col37 (type: int), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string) + TableScan + alias: ib1 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ib_income_band_sk is not null (type: boolean) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ib_income_band_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col35 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col37, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47 + Statistics: Num rows: 26620000 Data size: 27016104217 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-53 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col37 (type: int) + sort order: + + Map-reduce partition columns: _col37 (type: int) + Statistics: Num rows: 26620000 Data size: 27016104217 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string) + TableScan + alias: ib1 + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ib_income_band_sk is not null (type: boolean) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ib_income_band_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col37 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47 + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-54 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE + value expressions: _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45) and i_current_price BETWEEN 36 AND 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 28875 Data size: 41472528 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_product_name (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 28875 Data size: 41472528 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28875 Data size: 41472528 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col9, _col10, _col11, _col23, _col25, _col27, _col28, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47, _col50, _col53 + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2001 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string), _col50 (type: int), _col53 (type: string), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + outputColumnNames: _col21, _col23, _col25, _col27, _col28, _col39, _col40, _col41, _col42, _col44, _col45, _col46, _col47, _col50, _col53, _col9, _col10, _col11 + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col9), sum(_col10), sum(_col11) + keys: _col21 (type: int), _col23 (type: int), _col25 (type: int), _col27 (type: string), _col28 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: string), _col42 (type: string), _col44 (type: string), _col45 (type: string), _col46 (type: string), _col47 (type: string), _col50 (type: int), _col53 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-55 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string) + sort order: +++++++++++++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string) + Statistics: Num rows: 64420401 Data size: 65378975037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string), KEY._col10 (type: string), KEY._col11 (type: string), KEY._col12 (type: string), KEY._col13 (type: int), KEY._col14 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 32210200 Data size: 32689487011 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col13 (type: int), _col0 (type: int), _col15 (type: bigint), _col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col1, _col12, _col15, _col16, _col17, _col18, _col2, _col3 + Statistics: Num rows: 32210200 Data size: 32689487011 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-72 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_order_number is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_item_sk (type: int), cs_order_number (type: int), cs_ext_list_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cr_item_sk is not null and cr_order_number is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_refunded_cash (type: decimal(7,2)), cr_reversed_charge (type: decimal(7,2)), cr_store_credit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(7,2)), ((_col5 + _col6) + _col7) (type: decimal(9,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-73 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col1 > (2 * _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query68.q.out b/ql/src/test/results/clientpositive/perf/query68.q.out new file mode 100644 index 0000000..83d9fa1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query68.q.out @@ -0,0 +1,351 @@ +PREHOOK: query: explain select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,extended_price ,extended_tax ,list_price from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_ext_sales_price) extended_price ,sum(ss_ext_list_price) list_price ,sum(ss_ext_tax) extended_tax from store_sales ,date_dim ,store ,household_demographics ,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 2) and date_dim.d_year in (1998,1998+1,1998+2) and store.s_city in ('Rosedale','Bethlehem') group by ss_ticket_number ,ss_customer_sk ,ss_addr_sk,ca_city) dn ,customer ,customer_address current_addr where dn.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,ss_ticket_number limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select c_last_name ,c_first_name ,ca_city ,bought_city ,ss_ticket_number ,extended_price ,extended_tax ,list_price from (select ss_ticket_number ,ss_customer_sk ,ca_city bought_city ,sum(ss_ext_sales_price) extended_price ,sum(ss_ext_list_price) list_price ,sum(ss_ext_tax) extended_tax from store_sales ,date_dim ,store ,household_demographics ,customer_address where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_addr_sk = customer_address.ca_address_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_dep_count = 4 or household_demographics.hd_vehicle_count= 2) and date_dim.d_year in (1998,1998+1,1998+2) and store.s_city in ('Rosedale','Bethlehem') group by ss_ticket_number ,ss_customer_sk ,ss_addr_sk,ca_city) dn ,customer ,customer_address current_addr where dn.ss_customer_sk = customer.c_customer_sk and customer.c_current_addr_sk = current_addr.ca_address_sk and current_addr.ca_city <> bought_city order by c_last_name ,ss_ticket_number limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_hdemo_sk is not null) and ss_addr_sk is not null) and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2)), ss_ext_tax (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_city) IN ('Rosedale', 'Bethlehem') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hd_dep_count = 4) or (hd_vehicle_count = 2)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col18 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col18 (type: string), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + outputColumnNames: _col1, _col3, _col5, _col18, _col6, _col7, _col8 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col6), sum(_col7), sum(_col8) + keys: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col18 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 17200403963 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col7 (type: int) + sort order: + + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col8 (type: string), _col9 (type: string) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col8, _col9, _col11 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col11 <> _col2) (type: boolean) + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: string), _col8 (type: string), _col11 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col4 (type: int) + sort order: ++ + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query7.q.out b/ql/src/test/results/clientpositive/perf/query7.q.out new file mode 100644 index 0000000..83451c2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query7.q.out @@ -0,0 +1,254 @@ +PREHOOK: query: explain select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, item, promotion where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and store_sales.ss_promo_sk = promotion.p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, item, promotion where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and store_sales.ss_promo_sk = promotion.p_promo_sk and cd_gender = 'F' and cd_marital_status = 'W' and cd_education_status = 'Primary' and (p_channel_email = 'N' or p_channel_event = 'N') and d_year = 1998 group by i_item_id order by i_item_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_cdemo_sk is not null and ss_sold_date_sk is not null) and ss_item_sk is not null) and ss_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_promo_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: customer_demographics + Statistics: Num rows: 715800 Data size: 71770659 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((cd_gender = 'F') and (cd_marital_status = 'W')) and (cd_education_status = 'Primary')) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 44738 Data size: 4485716 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 44738 Data size: 4485716 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 44738 Data size: 4485716 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 49211 Data size: 4934287 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 49211 Data size: 4934287 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 54132 Data size: 5427715 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 54132 Data size: 5427715 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col15 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col15 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col15, _col4, _col5, _col7, _col6 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6) + keys: _col15 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out new file mode 100644 index 0000000..114c702 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query70.q.out @@ -0,0 +1,441 @@ +PREHOOK: query: explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-7 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-8 is a root stage + Stage-9 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-11 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1841 Data size: 2060546 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col6, _col7 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col7 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col6, _col7, _col2 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col6 (type: string), _col7 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: decimal(17,2)), _col2 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string), CASE WHEN ((_col5 = 2)) THEN (_col0) END (type: string), _col4 (type: decimal(17,2)) + sort order: +++ + Map-reduce partition columns: _col5 (type: string), CASE WHEN ((_col5 = 2)) THEN (_col0) END (type: string) + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: decimal(17,2)), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col4: decimal(17,2), _col5: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 + partition by: _col5, CASE WHEN ((_col5 = 2)) THEN (_col0) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col4 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: decimal(17,2)), _col0 (type: string), _col1 (type: string), _col5 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string), CASE WHEN ((_col3 = 0)) THEN (_col1) END (type: string), _col4 (type: int) + sort order: -++ + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: s + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 937 Data size: 1790951 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col2 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1(DESC) + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((rank_window_0 <= 5) and _col0 is not null) (type: boolean) + Statistics: Num rows: 1674 Data size: 1873224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1674 Data size: 1873224 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1674 Data size: 1873224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s_state is not null and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_county (type: string), s_state (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1674 Data size: 1873224 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1841 Data size: 2060546 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query71.q.out b/ql/src/test/results/clientpositive/perf/query71.q.out new file mode 100644 index 0000000..c37c978 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query71.q.out @@ -0,0 +1,343 @@ +PREHOOK: query: explain select i_brand_id brand_id, i_brand brand,t_hour,t_minute, sum(ext_price) ext_price from item JOIN (select ws_ext_sales_price as ext_price, ws_sold_date_sk as sold_date_sk, ws_item_sk as sold_item_sk, ws_sold_time_sk as time_sk from web_sales,date_dim where date_dim.d_date_sk = web_sales.ws_sold_date_sk and d_moy=12 and d_year=2001 union all select cs_ext_sales_price as ext_price, cs_sold_date_sk as sold_date_sk, cs_item_sk as sold_item_sk, cs_sold_time_sk as time_sk from catalog_sales,date_dim where date_dim.d_date_sk = catalog_sales.cs_sold_date_sk and d_moy=12 and d_year=2001 union all select ss_ext_sales_price as ext_price, ss_sold_date_sk as sold_date_sk, ss_item_sk as sold_item_sk, ss_sold_time_sk as time_sk from store_sales,date_dim where date_dim.d_date_sk = store_sales.ss_sold_date_sk and d_moy=12 and d_year=2001 ) tmp ON tmp.sold_item_sk = item.i_item_sk JOIN time_dim ON tmp.time_sk = time_dim.t_time_sk where i_manager_id=1 and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') group by i_brand, i_brand_id,t_hour,t_minute order by ext_price desc, i_brand_id +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_brand_id brand_id, i_brand brand,t_hour,t_minute, sum(ext_price) ext_price from item JOIN (select ws_ext_sales_price as ext_price, ws_sold_date_sk as sold_date_sk, ws_item_sk as sold_item_sk, ws_sold_time_sk as time_sk from web_sales,date_dim where date_dim.d_date_sk = web_sales.ws_sold_date_sk and d_moy=12 and d_year=2001 union all select cs_ext_sales_price as ext_price, cs_sold_date_sk as sold_date_sk, cs_item_sk as sold_item_sk, cs_sold_time_sk as time_sk from catalog_sales,date_dim where date_dim.d_date_sk = catalog_sales.cs_sold_date_sk and d_moy=12 and d_year=2001 union all select ss_ext_sales_price as ext_price, ss_sold_date_sk as sold_date_sk, ss_item_sk as sold_item_sk, ss_sold_time_sk as time_sk from store_sales,date_dim where date_dim.d_date_sk = store_sales.ss_sold_date_sk and d_moy=12 and d_year=2001 ) tmp ON tmp.sold_item_sk = item.i_item_sk JOIN time_dim ON tmp.time_sk = time_dim.t_time_sk where i_manager_id=1 and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') group by i_brand, i_brand_id,t_hour,t_minute order by ext_price desc, i_brand_id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5, Stage-8, Stage-10 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-8 is a root stage + Stage-10 is a root stage + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ws_sold_date_sk is not null and ws_item_sk is not null) and ws_sold_time_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 12) and (d_year = 2001)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + TableScan + Union + Statistics: Num rows: 30132 Data size: 33718044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 30132 Data size: 33718044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) + TableScan + Union + Statistics: Num rows: 30132 Data size: 33718044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 30132 Data size: 33718044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) + TableScan + Union + Statistics: Num rows: 30132 Data size: 33718044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 30132 Data size: 33718044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col4 (type: decimal(7,2)) + TableScan + alias: time_dim + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 43200 Data size: 20347200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 43200 Data size: 20347200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 43200 Data size: 20347200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4, _col8, _col9 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: string), _col8 (type: int), _col9 (type: int), _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col8, _col9, _col4 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + keys: _col1 (type: int), _col2 (type: string), _col8 (type: int), _col9 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: decimal(17,2)), _col0 (type: int) + sort order: -+ + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_sold_date_sk is not null and cs_item_sk is not null) and cs_sold_time_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 12) and (d_year = 2001)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ss_sold_date_sk is not null and ss_item_sk is not null) and ss_sold_time_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_sold_time_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_moy = 12) and (d_year = 2001)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query72.q.out b/ql/src/test/results/clientpositive/perf/query72.q.out new file mode 100644 index 0000000..545e20b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query72.q.out @@ -0,0 +1,518 @@ +PREHOOK: query: explain select i_item_desc ,w_warehouse_name ,d1.d_week_seq ,count(case when p_promo_sk is null then 1 else 0 end) no_promo ,count(case when p_promo_sk is not null then 1 else 0 end) promo ,count(*) total_cnt from catalog_sales join inventory on (catalog_sales.cs_item_sk = inventory.inv_item_sk) join warehouse on (warehouse.w_warehouse_sk=inventory.inv_warehouse_sk) join item on (item.i_item_sk = catalog_sales.cs_item_sk) join customer_demographics on (catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk) join household_demographics on (catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk) join date_dim d1 on (catalog_sales.cs_sold_date_sk = d1.d_date_sk) join date_dim d2 on (inventory.inv_date_sk = d2.d_date_sk) join date_dim d3 on (catalog_sales.cs_ship_date_sk = d3.d_date_sk) left outer join promotion on (catalog_sales.cs_promo_sk=promotion.p_promo_sk) left outer join catalog_returns on (catalog_returns.cr_item_sk = catalog_sales.cs_item_sk and catalog_returns.cr_order_number = catalog_sales.cs_order_number) where d1.d_week_seq = d2.d_week_seq and inv_quantity_on_hand < cs_quantity and d3.d_date > d1.d_date + 5 and hd_buy_potential = '1001-5000' and d1.d_year = 2001 and hd_buy_potential = '1001-5000' and cd_marital_status = 'M' and d1.d_year = 2001 group by i_item_desc,w_warehouse_name,d1.d_week_seq order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_desc ,w_warehouse_name ,d1.d_week_seq ,count(case when p_promo_sk is null then 1 else 0 end) no_promo ,count(case when p_promo_sk is not null then 1 else 0 end) promo ,count(*) total_cnt from catalog_sales join inventory on (catalog_sales.cs_item_sk = inventory.inv_item_sk) join warehouse on (warehouse.w_warehouse_sk=inventory.inv_warehouse_sk) join item on (item.i_item_sk = catalog_sales.cs_item_sk) join customer_demographics on (catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk) join household_demographics on (catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk) join date_dim d1 on (catalog_sales.cs_sold_date_sk = d1.d_date_sk) join date_dim d2 on (inventory.inv_date_sk = d2.d_date_sk) join date_dim d3 on (catalog_sales.cs_ship_date_sk = d3.d_date_sk) left outer join promotion on (catalog_sales.cs_promo_sk=promotion.p_promo_sk) left outer join catalog_returns on (catalog_returns.cr_item_sk = catalog_sales.cs_item_sk and catalog_returns.cr_order_number = catalog_sales.cs_order_number) where d1.d_week_seq = d2.d_week_seq and inv_quantity_on_hand < cs_quantity and d3.d_date > d1.d_date + 5 and hd_buy_potential = '1001-5000' and d1.d_year = 2001 and hd_buy_potential = '1001-5000' and cd_marital_status = 'M' and d1.d_year = 2001 group by i_item_desc,w_warehouse_name,d1.d_week_seq order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-10 + Stage-12 depends on stages: Stage-11 + Stage-0 depends on stages: Stage-12 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((cs_item_sk is not null and cs_bill_cdemo_sk is not null) and cs_bill_hdemo_sk is not null) and cs_sold_date_sk is not null) and cs_ship_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((inv_item_sk is not null and inv_warehouse_sk is not null) and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col11 < _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col10 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col1, _col10, _col2, _col3, _col4, _col5, _col6, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: int) + sort order: + + Map-reduce partition columns: _col10 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col8 (type: int) + TableScan + alias: warehouse + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 14415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col8, _col13 + Statistics: Num rows: 15 Data size: 15856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 15 Data size: 15856 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col8 (type: int), _col13 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_desc (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col8, _col13, _col15 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col8 (type: int), _col13 (type: string), _col15 (type: string) + TableScan + alias: customer_demographics + Statistics: Num rows: 787800 Data size: 71777859 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 196950 Data size: 17944464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 196950 Data size: 17944464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 196950 Data size: 17944464 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col8, _col13, _col15 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col8 (type: int), _col13 (type: string), _col15 (type: string) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col8, _col13, _col15 + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 307461 Data size: 441599510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col8 (type: int), _col13 (type: string), _col15 (type: string) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_year = 2001) and d_date_sk is not null) and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string), d_week_seq (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col8, _col13, _col15, _col21, _col22 + Statistics: Num rows: 338207 Data size: 485759471 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col8 (type: int), _col22 (type: int) + sort order: ++ + Map-reduce partition columns: _col8 (type: int), _col22 (type: int) + Statistics: Num rows: 338207 Data size: 485759471 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col13 (type: string), _col15 (type: string), _col21 (type: string) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col8 (type: int), _col22 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col13, _col15, _col21, _col22 + Statistics: Num rows: 372027 Data size: 534335429 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 372027 Data size: 534335429 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: int), _col13 (type: string), _col15 (type: string), _col21 (type: string), _col22 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col13, _col15, _col21, _col22, _col27 + Statistics: Num rows: 409229 Data size: 587768984 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(_col27) > (UDFToDouble(_col21) + 5.0)) (type: boolean) + Statistics: Num rows: 136409 Data size: 195922037 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col13, _col15, _col22, _col4, _col5, _col6 + Statistics: Num rows: 136409 Data size: 195922037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 136409 Data size: 195922037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col6 (type: int), _col13 (type: string), _col15 (type: string), _col22 (type: int) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 + Statistics: Num rows: 150049 Data size: 215514245 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int), _col6 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: int), _col6 (type: int) + Statistics: Num rows: 150049 Data size: 215514245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col4 (type: int), _col6 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col13, _col15, _col22, _col28 + Statistics: Num rows: 165053 Data size: 237065674 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col13 (type: string), _col22 (type: int), CASE WHEN (_col28 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col28 is not null) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 165053 Data size: 237065674 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col3), count(_col4), count() + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 165053 Data size: 237065674 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 165053 Data size: 237065674 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 82526 Data size: 118532118 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: -+++ + Statistics: Num rows: 82526 Data size: 118532118 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 82526 Data size: 118532118 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query73.q.out b/ql/src/test/results/clientpositive/perf/query73.q.out new file mode 100644 index 0000000..e2fd84b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query73.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: explain select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_buy_potential = '1001-5000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County') group by ss_ticket_number,ss_customer_sk) dj,customer where dj.ss_customer_sk = customer.c_customer_sk and cnt between 1 and 5 order by cnt desc +PREHOOK: type: QUERY +POSTHOOK: query: explain select c_last_name ,c_first_name ,c_salutation ,c_preferred_cust_flag ,ss_ticket_number ,cnt from (select ss_ticket_number ,ss_customer_sk ,count(*) cnt from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and date_dim.d_dom between 1 and 2 and (household_demographics.hd_buy_potential = '1001-5000' or household_demographics.hd_buy_potential = '5001-10000') and household_demographics.hd_vehicle_count > 0 and case when household_demographics.hd_vehicle_count > 0 then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_county in ('Kittitas County','Adams County','Richland County','Furnas County') group by ss_ticket_number,ss_customer_sk) dj,customer where dj.ss_customer_sk = customer.c_customer_sk and cnt between 1 and 5 order by cnt desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_hdemo_sk is not null) and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_county) IN ('Kittitas County', 'Adams County', 'Richland County', 'Furnas County') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((hd_buy_potential = '1001-5000') or (hd_buy_potential = '5001-10000')) and (hd_vehicle_count > 0)) and (CASE WHEN ((hd_vehicle_count > 0)) THEN ((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count))) ELSE (null) END > 1.0)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 400 Data size: 42800 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 400 Data size: 42800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 400 Data size: 42800 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6076 Data size: 6799805 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 BETWEEN 1 AND 5 (type: boolean) + Statistics: Num rows: 3038 Data size: 3399902 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3038 Data size: 3399902 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3038 Data size: 3399902 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: bigint) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col5 (type: string), _col4 (type: string), _col7 (type: string), _col0 (type: int), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: bigint) + sort order: - + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query75.q.out b/ql/src/test/results/clientpositive/perf/query75.q.out new file mode 100644 index 0000000..177f475 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query75.q.out @@ -0,0 +1,1042 @@ +PREHOOK: query: explain WITH all_sales AS ( SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,SUM(sales_cnt) AS sales_cnt ,SUM(sales_amt) AS sales_amt FROM (SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk JOIN date_dim ON d_date_sk=cs_sold_date_sk LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number AND cs_item_sk=cr_item_sk) WHERE i_category='Sports' UNION ALL SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt FROM store_sales JOIN item ON i_item_sk=ss_item_sk JOIN date_dim ON d_date_sk=ss_sold_date_sk LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number AND ss_item_sk=sr_item_sk) WHERE i_category='Sports' UNION ALL SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt FROM web_sales JOIN item ON i_item_sk=ws_item_sk JOIN date_dim ON d_date_sk=ws_sold_date_sk LEFT JOIN web_returns ON (ws_order_number=wr_order_number AND ws_item_sk=wr_item_sk) WHERE i_category='Sports') sales_detail GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) SELECT prev_yr.d_year AS prev_year ,curr_yr.d_year AS year ,curr_yr.i_brand_id ,curr_yr.i_class_id ,curr_yr.i_category_id ,curr_yr.i_manufact_id ,prev_yr.sales_cnt AS prev_yr_cnt ,curr_yr.sales_cnt AS curr_yr_cnt ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff FROM all_sales curr_yr, all_sales prev_yr WHERE curr_yr.i_brand_id=prev_yr.i_brand_id AND curr_yr.i_class_id=prev_yr.i_class_id AND curr_yr.i_category_id=prev_yr.i_category_id AND curr_yr.i_manufact_id=prev_yr.i_manufact_id AND curr_yr.d_year=2002 AND prev_yr.d_year=2002-1 AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 ORDER BY sales_cnt_diff limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain WITH all_sales AS ( SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,SUM(sales_cnt) AS sales_cnt ,SUM(sales_amt) AS sales_amt FROM (SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk JOIN date_dim ON d_date_sk=cs_sold_date_sk LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number AND cs_item_sk=cr_item_sk) WHERE i_category='Sports' UNION ALL SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt FROM store_sales JOIN item ON i_item_sk=ss_item_sk JOIN date_dim ON d_date_sk=ss_sold_date_sk LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number AND ss_item_sk=sr_item_sk) WHERE i_category='Sports' UNION ALL SELECT d_year ,i_brand_id ,i_class_id ,i_category_id ,i_manufact_id ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt FROM web_sales JOIN item ON i_item_sk=ws_item_sk JOIN date_dim ON d_date_sk=ws_sold_date_sk LEFT JOIN web_returns ON (ws_order_number=wr_order_number AND ws_item_sk=wr_item_sk) WHERE i_category='Sports') sales_detail GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) SELECT prev_yr.d_year AS prev_year ,curr_yr.d_year AS year ,curr_yr.i_brand_id ,curr_yr.i_class_id ,curr_yr.i_category_id ,curr_yr.i_manufact_id ,prev_yr.sales_cnt AS prev_yr_cnt ,curr_yr.sales_cnt AS curr_yr_cnt ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff FROM all_sales curr_yr, all_sales prev_yr WHERE curr_yr.i_brand_id=prev_yr.i_brand_id AND curr_yr.i_class_id=prev_yr.i_class_id AND curr_yr.i_category_id=prev_yr.i_category_id AND curr_yr.i_manufact_id=prev_yr.i_manufact_id AND curr_yr.d_year=2002 AND prev_yr.d_year=2002-1 AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 ORDER BY sales_cnt_diff limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-12, Stage-18 + Stage-5 depends on stages: Stage-4, Stage-25 + Stage-6 depends on stages: Stage-5 + Stage-10 is a root stage + Stage-11 depends on stages: Stage-10 + Stage-12 depends on stages: Stage-11 + Stage-16 is a root stage + Stage-17 depends on stages: Stage-16 + Stage-18 depends on stages: Stage-17 + Stage-22 is a root stage + Stage-23 depends on stages: Stage-22 + Stage-24 depends on stages: Stage-23 + Stage-25 depends on stages: Stage-24, Stage-31, Stage-37 + Stage-29 is a root stage + Stage-30 depends on stages: Stage-29 + Stage-31 depends on stages: Stage-30 + Stage-35 is a root stage + Stage-36 depends on stages: Stage-35 + Stage-37 depends on stages: Stage-36 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((i_category = 'Sports') and i_item_sk is not null) and i_brand_id is not null) and i_class_id is not null) and i_category_id is not null) and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: 2002 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), _col12 (type: int) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col15, _col16 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (UDFToDouble(_col4) - COALESCE(_col16,0.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: double) + TableScan + Union + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: double) + TableScan + Union + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: ++++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col5 (type: bigint), _col6 (type: double) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: ++++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col5 (type: bigint), _col6 (type: double) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col12, _col13 + Statistics: Num rows: 36458 Data size: 40798834 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((CAST( _col5 AS decimal(17,2)) / CAST( _col12 AS decimal(17,2))) < 0.9) (type: boolean) + Statistics: Num rows: 12152 Data size: 13598865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col12 (type: bigint), _col5 (type: bigint), (_col5 - _col12) (type: bigint), (_col6 - _col13) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 12152 Data size: 13598865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col8 (type: bigint) + sort order: + + Statistics: Num rows: 12152 Data size: 13598865 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint), _col9 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: bigint), VALUE._col7 (type: bigint), KEY.reducesinkkey0 (type: bigint), VALUE._col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 12152 Data size: 13598865 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 111900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((i_category = 'Sports') and i_item_sk is not null) and i_brand_id is not null) and i_class_id is not null) and i_category_id is not null) and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: 2002 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), _col12 (type: int) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col15, _col16 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (UDFToDouble(_col4) - COALESCE(_col16,0.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((i_category = 'Sports') and i_item_sk is not null) and i_brand_id is not null) and i_class_id is not null) and i_category_id is not null) and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: 2002 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), _col12 (type: int) + TableScan + alias: web_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col15, _col16 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (UDFToDouble(_col4) - COALESCE(_col16,0.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-22 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((i_category = 'Sports') and i_item_sk is not null) and i_brand_id is not null) and i_class_id is not null) and i_category_id is not null) and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-23 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: 2001 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-24 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), _col12 (type: int) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col15, _col16 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (UDFToDouble(_col4) - COALESCE(_col16,0.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-25 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: double) + TableScan + Union + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: double) + TableScan + Union + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 66288 Data size: 74179698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 33144 Data size: 37089849 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-29 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((i_category = 'Sports') and i_item_sk is not null) and i_brand_id is not null) and i_class_id is not null) and i_category_id is not null) and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-30 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: 2001 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-31 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), _col12 (type: int) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col15, _col16 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (UDFToDouble(_col4) - COALESCE(_col16,0.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-35 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((i_category = 'Sports') and i_item_sk is not null) and i_brand_id is not null) and i_class_id is not null) and i_category_id is not null) and i_manufact_id is not null) (type: boolean) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7219 Data size: 10368491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10 + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-36 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7940 Data size: 11405340 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: 2001 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-37 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), _col12 (type: int) + TableScan + alias: web_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col15, _col16 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col10 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (UDFToDouble(_col4) - COALESCE(_col16,0.0)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query76.q.out b/ql/src/test/results/clientpositive/perf/query76.q.out new file mode 100644 index 0000000..dac3dc1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query76.q.out @@ -0,0 +1,401 @@ +PREHOOK: query: explain select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim WHERE ss_addr_sk IS NULL AND store_sales.ss_sold_date_sk=date_dim.d_date_sk AND store_sales.ss_item_sk=item.i_item_sk UNION ALL SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price FROM web_sales, item, date_dim WHERE ws_web_page_sk IS NULL AND web_sales.ws_sold_date_sk=date_dim.d_date_sk AND web_sales.ws_item_sk=item.i_item_sk UNION ALL SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price FROM catalog_sales, item, date_dim WHERE cs_warehouse_sk IS NULL AND catalog_sales.cs_sold_date_sk=date_dim.d_date_sk AND catalog_sales.cs_item_sk=item.i_item_sk) foo GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim WHERE ss_addr_sk IS NULL AND store_sales.ss_sold_date_sk=date_dim.d_date_sk AND store_sales.ss_item_sk=item.i_item_sk UNION ALL SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price FROM web_sales, item, date_dim WHERE ws_web_page_sk IS NULL AND web_sales.ws_sold_date_sk=date_dim.d_date_sk AND web_sales.ws_item_sk=item.i_item_sk UNION ALL SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price FROM catalog_sales, item, date_dim WHERE cs_warehouse_sk IS NULL AND catalog_sales.cs_sold_date_sk=date_dim.d_date_sk AND catalog_sales.cs_item_sk=item.i_item_sk) foo GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-8, Stage-12 + Stage-4 depends on stages: Stage-3 + Stage-7 is a root stage + Stage-8 depends on stages: Stage-7 + Stage-11 is a root stage + Stage-12 depends on stages: Stage-11 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ss_addr_sk is null and ss_item_sk is not null) and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_category (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col5 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col7, _col8 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'store' (type: string), 'ss_addr_sk' (type: string), _col7 (type: int), _col8 (type: int), _col5 (type: string), _col3 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col5) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) + TableScan + Union + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col5) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) + TableScan + Union + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col5) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Statistics: Num rows: 838530 Data size: 1204362276 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 419265 Data size: 602181138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + sort order: +++++ + Statistics: Num rows: 419265 Data size: 602181138 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 419265 Data size: 602181138 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ws_web_page_sk is null and ws_item_sk is not null) and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_category (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col5 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col7, _col8 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'web' (type: string), 'ws_web_page_sk' (type: string), _col7 (type: int), _col8 (type: int), _col5 (type: string), _col3 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cs_warehouse_sk is null and cs_item_sk is not null) and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_category (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col5 + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col5 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col7, _col8 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'catalog' (type: string), 'cs_warehouse_sk' (type: string), _col7 (type: int), _col8 (type: int), _col5 (type: string), _col3 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query79.q.out b/ql/src/test/results/clientpositive/perf/query79.q.out new file mode 100644 index 0000000..1b5b39e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query79.q.out @@ -0,0 +1,263 @@ +PREHOOK: query: explain select c_last_name,c_first_name,substr(s_city,1,30) sub,ss_ticket_number,amt,profit from (select ss_ticket_number ,ss_customer_sk ,store.s_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) and date_dim.d_dow = 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_number_employees between 200 and 295 group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer where ms.ss_customer_sk = customer.c_customer_sk order by c_last_name,c_first_name,sub, profit limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select c_last_name,c_first_name,substr(s_city,1,30) sub,ss_ticket_number,amt,profit from (select ss_ticket_number ,ss_customer_sk ,store.s_city ,sum(ss_coupon_amt) amt ,sum(ss_net_profit) profit from store_sales,date_dim,store,household_demographics where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and (household_demographics.hd_dep_count = 8 or household_demographics.hd_vehicle_count > 0) and date_dim.d_dow = 1 and date_dim.d_year in (1998,1998+1,1998+2) and store.s_number_employees between 200 and 295 group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer where ms.ss_customer_sk = customer.c_customer_sk order by c_last_name,c_first_name,sub, profit limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_hdemo_sk is not null) and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_coupon_amt (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_dow = 1) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_city (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col13 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col13 (type: string) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3000 Data size: 321000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3000 Data size: 321000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3000 Data size: 321000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col13 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col13 (type: string), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + outputColumnNames: _col1, _col3, _col5, _col13, _col6, _col7 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col6), sum(_col7) + keys: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col13 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 12152 Data size: 13599611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6076 Data size: 6799805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6076 Data size: 6799805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6076 Data size: 6799805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col6, _col7 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col6 (type: string), substr(_col2, 1, 30) (type: string), _col0 (type: int), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)) + sort order: ++++ + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: int), _col4 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query80.q.out b/ql/src/test/results/clientpositive/perf/query80.q.out new file mode 100644 index 0000000..181ce78 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query80.q.out @@ -0,0 +1,889 @@ +PREHOOK: query: explain with ssr as (select s_store_id as store_id, sum(ss_ext_sales_price) as sales, sum(coalesce(sr_return_amt, 0)) as returns, sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit from store_sales left outer join store_returns on (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion where ss_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and ss_store_sk = s_store_sk and ss_item_sk = i_item_sk and i_current_price > 50 and ss_promo_sk = p_promo_sk and p_channel_tv = 'N' group by s_store_id) , csr as (select cp_catalog_page_id as catalog_page_id, sum(cs_ext_sales_price) as sales, sum(coalesce(cr_return_amount, 0)) as returns, sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit from catalog_sales left outer join catalog_returns on (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion where cs_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and cs_catalog_page_sk = cp_catalog_page_sk and cs_item_sk = i_item_sk and i_current_price > 50 and cs_promo_sk = p_promo_sk and p_channel_tv = 'N' group by cp_catalog_page_id) , wsr as (select web_site_id, sum(ws_ext_sales_price) as sales, sum(coalesce(wr_return_amt, 0)) as returns, sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit from web_sales left outer join web_returns on (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), date_dim, web_site, item, promotion where ws_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and ws_web_site_sk = web_site_sk and ws_item_sk = i_item_sk and i_current_price > 50 and ws_promo_sk = p_promo_sk and p_channel_tv = 'N' group by web_site_id) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , concat('store', store_id) as id , sales , returns , profit from ssr union all select 'catalog channel' as channel , concat('catalog_page', catalog_page_id) as id , sales , returns , profit from csr union all select 'web channel' as channel , concat('web_site', web_site_id) as id , sales , returns , profit from wsr ) x group by channel, id with rollup order by channel ,id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain with ssr as (select s_store_id as store_id, sum(ss_ext_sales_price) as sales, sum(coalesce(sr_return_amt, 0)) as returns, sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit from store_sales left outer join store_returns on (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion where ss_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and ss_store_sk = s_store_sk and ss_item_sk = i_item_sk and i_current_price > 50 and ss_promo_sk = p_promo_sk and p_channel_tv = 'N' group by s_store_id) , csr as (select cp_catalog_page_id as catalog_page_id, sum(cs_ext_sales_price) as sales, sum(coalesce(cr_return_amount, 0)) as returns, sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit from catalog_sales left outer join catalog_returns on (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion where cs_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and cs_catalog_page_sk = cp_catalog_page_sk and cs_item_sk = i_item_sk and i_current_price > 50 and cs_promo_sk = p_promo_sk and p_channel_tv = 'N' group by cp_catalog_page_id) , wsr as (select web_site_id, sum(ws_ext_sales_price) as sales, sum(coalesce(wr_return_amt, 0)) as returns, sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit from web_sales left outer join web_returns on (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), date_dim, web_site, item, promotion where ws_sold_date_sk = d_date_sk and d_date between cast('1998-08-04' as date) and (cast('1998-09-04' as date)) and ws_web_site_sk = web_site_sk and ws_item_sk = i_item_sk and i_current_price > 50 and ws_promo_sk = p_promo_sk and p_channel_tv = 'N' group by web_site_id) select channel , id , sum(sales) as sales , sum(returns) as returns , sum(profit) as profit from (select 'store channel' as channel , concat('store', store_id) as id , sales , returns , profit from ssr union all select 'catalog channel' as channel , concat('catalog_page', catalog_page_id) as id , sales , returns , profit from csr union all select 'web channel' as channel , concat('web_site', web_site_id) as id , sales , returns , profit from wsr ) x group by channel, id with rollup order by channel ,id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6, Stage-19, Stage-30 + Stage-8 depends on stages: Stage-7 + Stage-14 is a root stage + Stage-15 depends on stages: Stage-14 + Stage-16 depends on stages: Stage-15 + Stage-17 depends on stages: Stage-16 + Stage-18 depends on stages: Stage-17 + Stage-19 depends on stages: Stage-18 + Stage-25 is a root stage + Stage-26 depends on stages: Stage-25 + Stage-27 depends on stages: Stage-26 + Stage-28 depends on stages: Stage-27 + Stage-29 depends on stages: Stage-28 + Stage-30 depends on stages: Stage-29 + Stage-0 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_item_sk is not null) and ss_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: sr_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN 1998-08-04 AND 1998-09-04 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 84700 Data size: 121652752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 84700 Data size: 121652752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col14 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46585 Data size: 66909015 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'store channel' (type: string), concat('store', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 46585 Data size: 66909015 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 139755 Data size: 200727045 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3), sum(_col4) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 419265 Data size: 602181135 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 419265 Data size: 602181135 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) + TableScan + Union + Statistics: Num rows: 139755 Data size: 200727045 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3), sum(_col4) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 419265 Data size: 602181135 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 419265 Data size: 602181135 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) + TableScan + Union + Statistics: Num rows: 139755 Data size: 200727045 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3), sum(_col4) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 419265 Data size: 602181135 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 419265 Data size: 602181135 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 209632 Data size: 301089849 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 209632 Data size: 301089849 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 209632 Data size: 301089849 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(32,2)), _col4 (type: decimal(33,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(32,2)), VALUE._col2 (type: decimal(33,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 209632 Data size: 301089849 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((cs_sold_date_sk is not null and cs_catalog_page_sk is not null) and cs_item_sk is not null) and cs_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_catalog_page_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN 1998-08-04 AND 1998-09-04 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + TableScan + alias: catalog_page + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cp_catalog_page_sk is not null (type: boolean) + Statistics: Num rows: 23000 Data size: 10599404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23000 Data size: 10599404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23000 Data size: 10599404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 25300 Data size: 11659344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 25300 Data size: 11659344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 84700 Data size: 121652752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 84700 Data size: 121652752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col14 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46585 Data size: 66909015 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'catalog channel' (type: string), concat('catalog_page', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 46585 Data size: 66909015 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-25 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ws_sold_date_sk is not null and ws_web_site_sk is not null) and ws_item_sk is not null) and ws_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_site_sk (type: int), ws_promo_sk (type: int), ws_order_number (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + TableScan + alias: web_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: wr_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-26 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN 1998-08-04 AND 1998-09-04 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-27 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + TableScan + alias: web_site + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: web_site_sk is not null (type: boolean) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: web_site_sk (type: int), web_site_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-28 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: string) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 77000 Data size: 110593409 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 84700 Data size: 121652752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-29 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 84700 Data size: 121652752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: string) + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575 Data size: 678355 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col9, _col10, _col14 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col14 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-30 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 93170 Data size: 133818030 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46585 Data size: 66909015 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'web channel' (type: string), concat('web_site', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 46585 Data size: 66909015 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query82.q.out b/ql/src/test/results/clientpositive/perf/query82.q.out new file mode 100644 index 0000000..d85d8c1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query82.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: explain select i_item_id ,i_item_desc ,i_current_price from item, inventory, date_dim, store_sales where i_current_price between 30 and 30+30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk and d_date between '2002-05-30' and '2002-07-30' and i_manufact_id in (437,129,727,663) and inv_quantity_on_hand between 100 and 500 and ss_item_sk = i_item_sk group by i_item_id,i_item_desc,i_current_price order by i_item_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_id ,i_item_desc ,i_current_price from item, inventory, date_dim, store_sales where i_current_price between 30 and 30+30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk and d_date between '2002-05-30' and '2002-07-30' and i_manufact_id in (437,129,727,663) and inv_quantity_on_hand between 100 and 500 and ss_item_sk = i_item_sk group by i_item_id,i_item_desc,i_current_price order by i_item_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_current_price BETWEEN 30 AND 60 and (i_manufact_id) IN (437, 129, 727, 663)) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 57750 Data size: 82945057 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 57750 Data size: 82945057 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57750 Data size: 82945057 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((inv_quantity_on_hand BETWEEN 100 AND 500 and inv_item_sk is not null) and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ss_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN '2002-05-30' AND '2002-07-30' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)) + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: decimal(7,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 143600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query84.q.out b/ql/src/test/results/clientpositive/perf/query84.q.out new file mode 100644 index 0000000..eef3bf6 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query84.q.out @@ -0,0 +1,240 @@ +PREHOOK: query: explain select c_customer_id as customer_id ,concat(c_last_name, ', ', c_first_name) as customername from customer ,customer_address ,customer_demographics ,household_demographics ,income_band ,store_returns where ca_city = 'Hopewell' and customer.c_current_addr_sk = customer_address.ca_address_sk and ib_lower_bound >= 32287 and ib_upper_bound <= 32287 + 50000 and income_band.ib_income_band_sk = household_demographics.hd_income_band_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and store_returns.sr_cdemo_sk = customer_demographics.cd_demo_sk order by customer_id limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select c_customer_id as customer_id ,concat(c_last_name, ', ', c_first_name) as customername from customer ,customer_address ,customer_demographics ,household_demographics ,income_band ,store_returns where ca_city = 'Hopewell' and customer.c_current_addr_sk = customer_address.ca_address_sk and ib_lower_bound >= 32287 and ib_upper_bound <= 32287 + 50000 and income_band.ib_income_band_sk = household_demographics.hd_income_band_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and store_returns.sr_cdemo_sk = customer_demographics.cd_demo_sk order by customer_id limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c_current_addr_sk is not null and c_current_cdemo_sk is not null) and c_current_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 8600201981 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_id (type: string), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10000000 Data size: 8600201981 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 10000000 Data size: 8600201981 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: string) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_city = 'Hopewell') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 11000000 Data size: 9460222384 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 11000000 Data size: 9460222384 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: int), _col4 (type: string), _col5 (type: string) + TableScan + alias: customer_demographics + Statistics: Num rows: 955800 Data size: 71794659 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 477900 Data size: 35897329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 477900 Data size: 35897329 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 477900 Data size: 35897329 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: sr_cdemo_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_cdemo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5 + Statistics: Num rows: 24200000 Data size: 20812489695 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 24200000 Data size: 20812489695 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hd_demo_sk is not null and hd_income_band_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int), hd_income_band_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col5, _col11 + Statistics: Num rows: 26620000 Data size: 22893739160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 26620000 Data size: 22893739160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col4 (type: string), _col5 (type: string) + TableScan + alias: income_band + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((ib_lower_bound >= 32287) and (ib_upper_bound <= 82287)) and ib_income_band_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ib_income_band_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col5 + Statistics: Num rows: 29282000 Data size: 25183113621 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), concat(_col5, ', ', _col4) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29282000 Data size: 25183113621 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 29282000 Data size: 25183113621 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29282000 Data size: 25183113621 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query85.q.out b/ql/src/test/results/clientpositive/perf/query85.q.out new file mode 100644 index 0000000..89d8a10 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query85.q.out @@ -0,0 +1,400 @@ +PREHOOK: query: explain select substr(r_reason_desc,1,20) as r ,avg(ws_quantity) wq ,avg(wr_refunded_cash) ref ,avg(wr_fee) fee from web_sales, web_returns, web_page, customer_demographics cd1, customer_demographics cd2, customer_address, date_dim, reason where web_sales.ws_web_page_sk = web_page.wp_web_page_sk and web_sales.ws_item_sk = web_returns.wr_item_sk and web_sales.ws_order_number = web_returns.wr_order_number and web_sales.ws_sold_date_sk = date_dim.d_date_sk and d_year = 1998 and cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk and cd2.cd_demo_sk = web_returns.wr_returning_cdemo_sk and customer_address.ca_address_sk = web_returns.wr_refunded_addr_sk and reason.r_reason_sk = web_returns.wr_reason_sk and ( ( cd1.cd_marital_status = 'M' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = '4 yr Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 100.00 and 150.00 ) or ( cd1.cd_marital_status = 'D' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Primary' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 50.00 and 100.00 ) or ( cd1.cd_marital_status = 'U' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Advanced Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 150.00 and 200.00 ) ) and ( ( ca_country = 'United States' and ca_state in ('KY', 'GA', 'NM') and ws_net_profit between 100 and 200 ) or ( ca_country = 'United States' and ca_state in ('MT', 'OR', 'IN') and ws_net_profit between 150 and 300 ) or ( ca_country = 'United States' and ca_state in ('WI', 'MO', 'WV') and ws_net_profit between 50 and 250 ) ) group by r_reason_desc order by r, wq, ref, fee limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select substr(r_reason_desc,1,20) as r ,avg(ws_quantity) wq ,avg(wr_refunded_cash) ref ,avg(wr_fee) fee from web_sales, web_returns, web_page, customer_demographics cd1, customer_demographics cd2, customer_address, date_dim, reason where web_sales.ws_web_page_sk = web_page.wp_web_page_sk and web_sales.ws_item_sk = web_returns.wr_item_sk and web_sales.ws_order_number = web_returns.wr_order_number and web_sales.ws_sold_date_sk = date_dim.d_date_sk and d_year = 1998 and cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk and cd2.cd_demo_sk = web_returns.wr_returning_cdemo_sk and customer_address.ca_address_sk = web_returns.wr_refunded_addr_sk and reason.r_reason_sk = web_returns.wr_reason_sk and ( ( cd1.cd_marital_status = 'M' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = '4 yr Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 100.00 and 150.00 ) or ( cd1.cd_marital_status = 'D' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Primary' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 50.00 and 100.00 ) or ( cd1.cd_marital_status = 'U' and cd1.cd_marital_status = cd2.cd_marital_status and cd1.cd_education_status = 'Advanced Degree' and cd1.cd_education_status = cd2.cd_education_status and ws_sales_price between 150.00 and 200.00 ) ) and ( ( ca_country = 'United States' and ca_state in ('KY', 'GA', 'NM') and ws_net_profit between 100 and 200 ) or ( ca_country = 'United States' and ca_state in ('MT', 'OR', 'IN') and ws_net_profit between 150 and 300 ) or ( ca_country = 'United States' and ca_state in ('WI', 'MO', 'WV') and ws_net_profit between 50 and 250 ) ) group by r_reason_desc order by r, wq, ref, fee limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-9 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((((((ws_sales_price BETWEEN 100.0 AND 150.0 or ws_sales_price BETWEEN 50.0 AND 100.0 or ws_sales_price BETWEEN 150.0 AND 200.0) and (ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250)) and ws_item_sk is not null) and ws_order_number is not null) and ws_web_page_sk is not null) and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + TableScan + alias: web_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((((wr_item_sk is not null and wr_order_number is not null) and wr_refunded_cdemo_sk is not null) and wr_returning_cdemo_sk is not null) and wr_refunded_addr_sk is not null) and wr_reason_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_item_sk (type: int), wr_refunded_cdemo_sk (type: int), wr_refunded_addr_sk (type: int), wr_returning_cdemo_sk (type: int), wr_reason_sk (type: int), wr_order_number (type: int), wr_fee (type: decimal(7,2)), wr_refunded_cash (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col5 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col5 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col8, _col9, _col10, _col11, _col13, _col14 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: web_page + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 2301 Data size: 1348089 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2301 Data size: 1348089 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2301 Data size: 1348089 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col5, _col6, _col8, _col9, _col10, _col11, _col13, _col14 + Statistics: Num rows: 2531 Data size: 1482897 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 2531 Data size: 1482897 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: cd1 + Statistics: Num rows: 979800 Data size: 71797059 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U'))) and cd_demo_sk is not null) and cd_marital_status is not null) and cd_education_status is not null) (type: boolean) + Statistics: Num rows: 122475 Data size: 8974632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 122475 Data size: 8974632 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 122475 Data size: 8974632 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col8 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col5, _col6, _col9, _col10, _col11, _col13, _col14, _col17, _col18 + Statistics: Num rows: 134722 Data size: 9872095 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col17 = 'M') and (_col18 = '4 yr Degree') and _col5 BETWEEN 100.0 AND 150.0) or ((_col17 = 'D') and (_col18 = 'Primary') and _col5 BETWEEN 50.0 AND 100.0) or ((_col17 = 'U') and (_col18 = 'Advanced Degree') and _col5 BETWEEN 150.0 AND 200.0)) (type: boolean) + Statistics: Num rows: 50520 Data size: 3701980 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col17 (type: string), _col18 (type: string), _col4 (type: int), _col6 (type: decimal(7,2)), _col9 (type: int) + outputColumnNames: _col0, _col10, _col11, _col13, _col14, _col17, _col18, _col4, _col6, _col9 + Statistics: Num rows: 50520 Data size: 3701980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: int), _col17 (type: string), _col18 (type: string) + sort order: +++ + Map-reduce partition columns: _col10 (type: int), _col17 (type: string), _col18 (type: string) + Statistics: Num rows: 50520 Data size: 3701980 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col9 (type: int), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: cd1 + Statistics: Num rows: 979800 Data size: 71797059 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U'))) and cd_demo_sk is not null) and cd_marital_status is not null) and cd_education_status is not null) (type: boolean) + Statistics: Num rows: 122475 Data size: 8974632 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 122475 Data size: 8974632 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 122475 Data size: 8974632 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: int), _col17 (type: string), _col18 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col4, _col6, _col9, _col11, _col13, _col14 + Statistics: Num rows: 134722 Data size: 9872095 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 134722 Data size: 9872095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col9 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col6, _col11, _col13, _col14, _col23 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col23) IN ('KY', 'GA', 'NM') and _col6 BETWEEN 100 AND 200) or ((_col23) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 AND 300) or ((_col23) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col4 (type: int) + outputColumnNames: _col0, _col11, _col13, _col14, _col4 + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8250000 Data size: 8372759208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col11 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col11, _col13, _col14 + Statistics: Num rows: 9075000 Data size: 9210035328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 9075000 Data size: 9210035328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + TableScan + alias: reason + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: r_reason_sk is not null (type: boolean) + Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: r_reason_sk (type: int), r_reason_desc (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36 Data size: 7200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col13, _col14, _col28 + Statistics: Num rows: 9982500 Data size: 10131039080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col28 (type: string), _col4 (type: int), _col14 (type: decimal(7,2)), _col13 (type: decimal(7,2)) + outputColumnNames: _col28, _col4, _col14, _col13 + Statistics: Num rows: 9982500 Data size: 10131039080 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col4), avg(_col14), avg(_col13) + keys: _col28 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9982500 Data size: 10131039080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9982500 Data size: 10131039080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4991250 Data size: 5065519540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(_col0, 1, 20) (type: string), _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4991250 Data size: 5065519540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)) + sort order: ++++ + Statistics: Num rows: 4991250 Data size: 5065519540 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(11,6)), KEY.reducesinkkey3 (type: decimal(11,6)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4991250 Data size: 5065519540 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query87.q.out b/ql/src/test/results/clientpositive/perf/query87.q.out new file mode 100644 index 0000000..954b622 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query87.q.out @@ -0,0 +1,499 @@ +PREHOOK: query: explain select count(*) from (select distinct c_last_name as l1, c_first_name as f1, d_date as d1 from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t1 LEFT OUTER JOIN ( select distinct c_last_name as l2, c_first_name as f2, d_date as d2 from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk JOIN customer ON catalog_sales.cs_bill_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t2 ON t1.l1 = t2.l2 and t1.f1 = t2.f2 and t1.d1 = t2.d2 LEFT OUTER JOIN (select distinct c_last_name as l3, c_first_name as f3, d_date as d3 from web_sales JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t3 ON t1.l1 = t3.l3 and t1.f1 = t3.f3 and t1.d1 = t3.d3 WHERE l2 is null and l3 is null +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from (select distinct c_last_name as l1, c_first_name as f1, d_date as d1 from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t1 LEFT OUTER JOIN ( select distinct c_last_name as l2, c_first_name as f2, d_date as d2 from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk JOIN customer ON catalog_sales.cs_bill_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t2 ON t1.l1 = t2.l2 and t1.f1 = t2.f2 and t1.d1 = t2.d2 LEFT OUTER JOIN (select distinct c_last_name as l3, c_first_name as f3, d_date as d3 from web_sales JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk where d_month_seq between 1193 and 1193+11 ) t3 ON t1.l1 = t3.l3 and t1.f1 = t3.f3 and t1.d1 = t3.d3 WHERE l2 is null and l3 is null +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-11 + Stage-5 depends on stages: Stage-4, Stage-16 + Stage-6 depends on stages: Stage-5 + Stage-9 is a root stage + Stage-10 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-10 + Stage-14 is a root stage + Stage-15 depends on stages: Stage-14 + Stage-16 depends on stages: Stage-15 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col6, _col7 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: string), _col6 (type: string), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 12100000 Data size: 10406244848 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12100000 Data size: 10406244848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 12100000 Data size: 10406244848 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col6 + Statistics: Num rows: 24200000 Data size: 20812489696 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col6 is null (type: boolean) + Statistics: Num rows: 12100000 Data size: 10406244848 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 12100000 Data size: 10406244848 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col6, _col7 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: string), _col6 (type: string), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col6, _col7 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col3 (type: string), _col6 (type: string), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22000000 Data size: 18920444769 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query90.q.out b/ql/src/test/results/clientpositive/perf/query90.q.out new file mode 100644 index 0000000..196309e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query90.q.out @@ -0,0 +1,395 @@ +Warning: Shuffle Join JOIN[65][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-5:MAPRED' is a cross product +PREHOOK: query: explain select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 6 and 6+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) at, ( select count(*) pmc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 14 and 14+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) pt order by am_pm_ratio limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 6 and 6+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) at, ( select count(*) pmc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 14 and 14+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) pt order by am_pm_ratio limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4, Stage-13 + Stage-6 depends on stages: Stage-5 + Stage-10 is a root stage + Stage-11 depends on stages: Stage-10 + Stage-12 depends on stages: Stage-11 + Stage-13 depends on stages: Stage-12 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null) and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_time_sk (type: int), ws_ship_hdemo_sk (type: int), ws_web_page_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1980 Data size: 211860 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1980 Data size: 211860 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + TableScan + alias: time_dim + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 21600 Data size: 10173600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 21600 Data size: 10173600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 21600 Data size: 10173600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 23760 Data size: 11190960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 23760 Data size: 11190960 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: web_page + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 1151 Data size: 674337 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1151 Data size: 674337 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1151 Data size: 674337 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 26136 Data size: 12310056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) (type: decimal(35,20)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: decimal(35,20)) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(35,20)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ws_ship_hdemo_sk is not null and ws_sold_time_sk is not null) and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_sold_time_sk (type: int), ws_ship_hdemo_sk (type: int), ws_web_page_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1980 Data size: 211860 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1980 Data size: 211860 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + TableScan + alias: time_dim + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 21600 Data size: 10173600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 21600 Data size: 10173600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 21600 Data size: 10173600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 23760 Data size: 11190960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 23760 Data size: 11190960 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: web_page + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 1151 Data size: 674337 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1151 Data size: 674337 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1151 Data size: 674337 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 26136 Data size: 12310056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query91.q.out b/ql/src/test/results/clientpositive/perf/query91.q.out new file mode 100644 index 0000000..8fafca3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query91.q.out @@ -0,0 +1,340 @@ +PREHOOK: query: explain select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk and catalog_returns.cr_returned_date_sk = date_dim.d_date_sk and catalog_returns.cr_returning_customer_sk= customer.c_customer_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and customer_address.ca_address_sk = customer.c_current_addr_sk and d_year = 1999 and d_moy = 11 and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like '0-500%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by Returns_Loss desc +PREHOOK: type: QUERY +POSTHOOK: query: explain select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk and catalog_returns.cr_returned_date_sk = date_dim.d_date_sk and catalog_returns.cr_returning_customer_sk= customer.c_customer_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and customer_address.ca_address_sk = customer.c_current_addr_sk and d_year = 1999 and d_moy = 11 and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like '0-500%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by Returns_Loss desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: call_center + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + TableScan + alias: catalog_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((cr_call_center_sk is not null and cr_returned_date_sk is not null) and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(7,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + Statistics: Num rows: 33 Data size: 67485 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 33 Data size: 67485 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col7 (type: decimal(7,2)) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_year = 1999) and (d_moy = 11)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col7 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(7,2)) + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((c_customer_sk is not null and c_current_addr_sk is not null) and c_current_cdemo_sk is not null) and c_current_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 5000000 Data size: 4300100990 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5000000 Data size: 4300100990 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5000000 Data size: 4300100990 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col12, _col13, _col14 + Statistics: Num rows: 5500000 Data size: 4730111191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col14 (type: int) + sort order: + + Map-reduce partition columns: _col14 (type: int) + Statistics: Num rows: 5500000 Data size: 4730111191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: int) + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col14 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col12, _col13 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(7,2)), _col13 (type: int) + TableScan + alias: customer_demographics + Statistics: Num rows: 1051800 Data size: 717104259 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((cd_marital_status = 'M') or (cd_marital_status = 'W')) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree'))) and (((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree')))) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 262950 Data size: 179276064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 262950 Data size: 179276064 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 262950 Data size: 179276064 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col13, _col18, _col19 + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col13 (type: int) + sort order: + + Map-reduce partition columns: _col13 (type: int) + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(7,2)), _col18 (type: string), _col19 (type: string) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col18, _col19 + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col18 (type: string), _col19 (type: string), _col7 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3, _col18, _col19, _col7 + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col7) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col18 (type: string), _col19 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6655000 Data size: 6754026054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6655000 Data size: 6754026054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: decimal(17,2)) + sort order: - + Statistics: Num rows: 6655000 Data size: 6754026054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6655000 Data size: 6754026054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6655000 Data size: 6754026054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query92.q.out b/ql/src/test/results/clientpositive/perf/query92.q.out new file mode 100644 index 0000000..1f7b8a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query92.q.out @@ -0,0 +1,240 @@ +PREHOOK: query: explain SELECT sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) as store_only, sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) as catalog_only, sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) as store_and_catalog FROM (SELECT ss.ss_customer_sk as customer_sk, ss.ss_item_sk as item_sk FROM store_sales ss JOIN date_dim d1 ON (ss.ss_sold_date_sk = d1.d_date_sk) WHERE d1.d_month_seq >= 1206 and d1.d_month_seq <= 1217 GROUP BY ss.ss_customer_sk, ss.ss_item_sk) ssci FULL OUTER JOIN (SELECT cs.cs_bill_customer_sk as customer_sk, cs.cs_item_sk as item_sk FROM catalog_sales cs JOIN date_dim d2 ON (cs.cs_sold_date_sk = d2.d_date_sk) WHERE d2.d_month_seq >= 1206 and d2.d_month_seq <= 1217 GROUP BY cs.cs_bill_customer_sk, cs.cs_item_sk) csci ON (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) as store_only, sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) as catalog_only, sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) as store_and_catalog FROM (SELECT ss.ss_customer_sk as customer_sk, ss.ss_item_sk as item_sk FROM store_sales ss JOIN date_dim d1 ON (ss.ss_sold_date_sk = d1.d_date_sk) WHERE d1.d_month_seq >= 1206 and d1.d_month_seq <= 1217 GROUP BY ss.ss_customer_sk, ss.ss_item_sk) ssci FULL OUTER JOIN (SELECT cs.cs_bill_customer_sk as customer_sk, cs.cs_item_sk as item_sk FROM catalog_sales cs JOIN date_dim d2 ON (cs.cs_sold_date_sk = d2.d_date_sk) WHERE d2.d_month_seq >= 1206 and d2.d_month_seq <= 1217 GROUP BY cs.cs_bill_customer_sk, cs.cs_item_sk) csci ON (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_month_seq >= 1206) and (d_month_seq <= 1217)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 4463 Data size: 4994992 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4463 Data size: 4994992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4463 Data size: 4994992 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2231 Data size: 2496936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2231 Data size: 2496936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2231 Data size: 2496936 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2231 Data size: 2496936 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2454 Data size: 2746629 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((_col0 is not null and _col2 is null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2454 Data size: 2746629 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: cs + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: cs_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((d_month_seq >= 1206) and (d_month_seq <= 1217)) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 4463 Data size: 4994992 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4463 Data size: 4994992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4463 Data size: 4994992 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2231 Data size: 2496936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query93.q.out b/ql/src/test/results/clientpositive/perf/query93.q.out new file mode 100644 index 0000000..9e08f79 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query93.q.out @@ -0,0 +1,166 @@ +PREHOOK: query: explain select ss_customer_sk ,sum(act_sales) sumsales from (select ss_item_sk ,ss_ticket_number ,ss_customer_sk ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price else (ss_quantity*ss_sales_price) end act_sales from store_sales left outer join store_returns on (store_returns.sr_item_sk = store_sales.ss_item_sk and store_returns.sr_ticket_number = store_sales.ss_ticket_number) ,reason where store_returns.sr_reason_sk = reason.r_reason_sk and r_reason_desc = 'Did not like the warranty') t group by ss_customer_sk order by sumsales, ss_customer_sk limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select ss_customer_sk ,sum(act_sales) sumsales from (select ss_item_sk ,ss_ticket_number ,ss_customer_sk ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price else (ss_quantity*ss_sales_price) end act_sales from store_sales left outer join store_returns on (store_returns.sr_item_sk = store_sales.ss_item_sk and store_returns.sr_ticket_number = store_sales.ss_ticket_number) ,reason where store_returns.sr_reason_sk = reason.r_reason_sk and r_reason_desc = 'Did not like the warranty') t group by ss_customer_sk order by sumsales, ss_customer_sk limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + TableScan + alias: store_returns + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_reason_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col2 (type: int) + outputColumnNames: _col1, _col3, _col4, _col6, _col8 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: _col6 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int) + TableScan + alias: reason + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: r_reason_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col8 + Statistics: Num rows: 19 Data size: 3960 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), CASE WHEN (_col8 is not null) THEN ((CAST( (_col3 - _col8) AS decimal(10,0)) * _col4)) ELSE ((CAST( _col3 AS decimal(10,0)) * _col4)) END (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3960 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 19 Data size: 3960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(28,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: decimal(28,2)), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 9 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: decimal(28,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 9 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query94.q.out b/ql/src/test/results/clientpositive/perf/query94.q.out new file mode 100644 index 0000000..679d21b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query94.q.out @@ -0,0 +1,326 @@ +PREHOOK: query: explain SELECT count(distinct ws_order_number) as order_count, sum(ws_ext_ship_cost) as total_shipping_cost, sum(ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT OUTER JOIN web_returns wr1 ON (ws1.ws_order_number = wr1.wr_order_number) WHERE d.d_date between '1999-05-01' and '1999-07-01' and ca.ca_state = 'TX' and s.web_company_name = 'pri' and wr1.wr_order_number is null limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT count(distinct ws_order_number) as order_count, sum(ws_ext_ship_cost) as total_shipping_cost, sum(ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT OUTER JOIN web_returns wr1 ON (ws1.ws_order_number = wr1.wr_order_number) WHERE d.d_date between '1999-05-01' and '1999-07-01' and ca.ca_state = 'TX' and s.web_company_name = 'pri' and wr1.wr_order_number is null limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ws_order_number is not null and ws_ship_addr_sk is not null) and ws_web_site_sk is not null) and ws_ship_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: ca + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: s + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 38852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: web_site_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 38852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 21 Data size: 38852 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: d + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN '1999-05-01' AND '1999-07-01' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5 + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: wr1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_order_number (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col12 + Statistics: Num rows: 14641000 Data size: 14858857640 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is null (type: boolean) + Statistics: Num rows: 7320500 Data size: 7429428820 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + outputColumnNames: _col3, _col4, _col5 + Statistics: Num rows: 7320500 Data size: 7429428820 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col3), sum(_col4), sum(_col5) + keys: _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7320500 Data size: 7429428820 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 7320500 Data size: 7429428820 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0), sum(VALUE._col1), sum(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query95.q.out b/ql/src/test/results/clientpositive/perf/query95.q.out new file mode 100644 index 0000000..aea0ede --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query95.q.out @@ -0,0 +1,389 @@ +PREHOOK: query: explain SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk ) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri' +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT count(distinct ws1.ws_order_number) as order_count, sum(ws1.ws_ext_ship_cost) as total_shipping_cost, sum(ws1.ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT SEMI JOIN (SELECT wr_order_number FROM web_returns wr JOIN (SELECT ws4.ws_order_number as ws_order_number FROM web_sales ws4 JOIN web_sales ws5 ON (ws4.ws_order_number = ws5.ws_order_number) WHERE ws4.ws_warehouse_sk <> ws5.ws_warehouse_sk ) ws_wh2 ON (wr.wr_order_number = ws_wh2.ws_order_number)) tmp1 ON (ws1.ws_order_number = tmp1.wr_order_number) WHERE d.d_date between '2002-05-01' and '2002-06-30' and ca.ca_state = 'GA' and s.web_company_name = 'pri' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-1 depends on stages: Stage-6, Stage-8 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-9 is a root stage + Stage-8 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((ws_order_number is not null and ws_ship_addr_sk is not null) and ws_web_site_sk is not null) and ws_ship_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: ca + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_state = 'GA') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: s + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 38852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: web_site_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 38852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 21 Data size: 38852 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12100000 Data size: 12280047105 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + TableScan + alias: d + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN '2002-05-01' AND '2002-06-30' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5 + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col3), sum(_col4), sum(_col5) + keys: _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 13310000 Data size: 13508052108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0), sum(VALUE._col1), sum(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: ws1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ws_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: wr + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: wr_order_number is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: wr_order_number (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query96.q.out b/ql/src/test/results/clientpositive/perf/query96.q.out new file mode 100644 index 0000000..66c9ab1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query96.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: explain select count(*) as c from store_sales ,household_demographics ,time_dim, store where store_sales.ss_sold_time_sk = time_dim.t_time_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_store_sk = store.s_store_sk and time_dim.t_hour = 8 and time_dim.t_minute >= 30 and household_demographics.hd_dep_count = 5 and store.s_store_name = 'ese' order by c limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) as c from store_sales ,household_demographics ,time_dim, store where store_sales.ss_sold_time_sk = time_dim.t_time_sk and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk and store_sales.ss_store_sk = store.s_store_sk and time_dim.t_hour = 8 and time_dim.t_minute >= 30 and household_demographics.hd_dep_count = 5 and store.s_store_name = 'ese' order by c limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_time_sk (type: int), ss_hdemo_sk (type: int), ss_store_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) + TableScan + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((hd_dep_count = 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1800 Data size: 192600 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1980 Data size: 211860 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1980 Data size: 211860 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + TableScan + alias: time_dim + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((t_hour = 8) and (t_minute >= 30)) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 3391200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7200 Data size: 3391200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7200 Data size: 3391200 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 7920 Data size: 3730320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 7920 Data size: 3730320 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_store_name = 'ese') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 426 Data size: 814069 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 8712 Data size: 4103352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query97.q.out b/ql/src/test/results/clientpositive/perf/query97.q.out new file mode 100644 index 0000000..beac9bd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query97.q.out @@ -0,0 +1,244 @@ +PREHOOK: query: explain select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog from ( select ss_customer_sk customer_sk ,ss_item_sk item_sk from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by ss_customer_sk ,ss_item_sk) ssci full outer join ( select cs_bill_customer_sk customer_sk ,cs_item_sk item_sk from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by cs_bill_customer_sk ,cs_item_sk) csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog from ( select ss_customer_sk customer_sk ,ss_item_sk item_sk from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by ss_customer_sk ,ss_item_sk) ssci full outer join ( select cs_bill_customer_sk customer_sk ,cs_item_sk item_sk from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by cs_bill_customer_sk ,cs_item_sk) csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-7 + Stage-4 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((_col0 is not null and _col2 is null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11048 Data size: 12363283 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: cs_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10044 Data size: 11239348 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/query98.q.out b/ql/src/test/results/clientpositive/perf/query98.q.out new file mode 100644 index 0000000..f055a7c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query98.q.out @@ -0,0 +1,217 @@ +PREHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ss_ext_sales_price) as itemrevenue ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio from store_sales ,item ,date_dim where store_sales.ss_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_date between cast('2001-01-12' as date) and (cast('2001-02-11' as date)) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ss_ext_sales_price) as itemrevenue ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio from store_sales ,item ,date_dim where store_sales.ss_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_date between cast('2001-01-12' as date) and (cast('2001-02-11' as date)) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 127050 Data size: 182479129 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date BETWEEN 2001-01-12 AND 2001-02-11 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string), _col2 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col2 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)), KEY._col3 (type: string), KEY._col4 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col3 (type: string), _col2 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: decimal(7,2), _col5: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col0 (type: string), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,23)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col0 (type: string), _col6 (type: decimal(38,23)) + sort order: +++++ + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,23)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 69877 Data size: 100362804 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/templates/TestPerfCliDriver.vm b/ql/src/test/templates/TestPerfCliDriver.vm new file mode 100644 index 0000000..2351366 --- /dev/null +++ b/ql/src/test/templates/TestPerfCliDriver.vm @@ -0,0 +1,330 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.cli; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import java.io.*; +import java.util.*; +import java.nio.*; +import java.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; + +import org.apache.hadoop.hive.ql.QTestUtil; +import org.apache.hadoop.hive.ql.QTestUtil.MiniClusterType; +import org.apache.hadoop.hive.ql.session.SessionState; + +public class $className extends TestCase { + + private static final String HIVE_ROOT = QTestUtil.ensurePathEndsInSlash(System.getProperty("hive.root")); + private static QTestUtil qt; + private static final Logger LOG = LoggerFactory.getLogger("TestPerfCliDriver"); + + public static class TestPerfCliDriverAddTestFromQFiles implements QTestUtil.SuiteAddTestFunctor { + public void addTestToSuite(TestSuite suite, Object setup, String tName) { + suite.addTest(new $className("testPerfCliDriver_"+tName)); + } + } + + static { + + MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode"); + String hiveConfDir = "$hiveConfDir"; + String initScript = "$initScript"; + String cleanupScript = "$cleanupScript"; + try { + String hadoopVer = "$hadoopVersion"; + if (!hiveConfDir.isEmpty()) { + hiveConfDir = HIVE_ROOT + hiveConfDir; + } + qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, + hiveConfDir, hadoopVer, initScript, cleanupScript); + + // do a one time initialization + qt.cleanUp(); + qt.createSources(); + setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(qt.getConf()); + } catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in static initialization: "+e.getMessage()); + } + } + + public $className(String name) { + super(name); + } + + @Override + protected void setUp() { + try { + qt.clearTestSideEffects(); + } catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in setup"); + } + } + + /** + * Dummy last test. This is only meant to shutdown qt + */ + public void testPerfCliDriver_shutdown() { + System.err.println ("Cleaning up " + "$className"); + } + + @Override + protected void tearDown() { + try { + qt.clearPostTestEffects(); + if (getName().equals("testPerfCliDriver_shutdown")) + qt.shutdown(); + } + catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in tearDown"); + } + } + + public static Test suite() { + Set qFilesToExecute = new HashSet(); + String qFiles = System.getProperty("qfile", "").trim(); + if(!qFiles.isEmpty()) { + for(String qFile : qFiles.split(",")) { + qFile = qFile.trim(); + if(!qFile.isEmpty()) { + qFilesToExecute.add(qFile); + } + } + } + + TestSuite suite = new TestSuite(); + + QTestUtil.addTestsToSuiteFromQfileNames("$qFileNamesFile", qFilesToExecute, + suite, null, new TestPerfCliDriverAddTestFromQFiles()); + suite.addTest(new $className("testPerfCliDriver_shutdown")); + return suite; + } + + static String debugHint = "\nSee ./ql/target/tmp/log/hive.log or ./itests/qtest/target/tmp/log/hive.log, " + + "or check ./ql/target/surefire-reports or ./itests/qtest/target/surefire-reports/ for specific test cases logs."; + +#foreach ($qf in $qfiles) + #set ($fname = $qf.getName()) + #set ($eidx = $fname.indexOf('.')) + #set ($tname = $fname.substring(0, $eidx)) + #set ($fpath = $qfilesMap.get($fname)) + public void testPerfCliDriver_$tname() throws Exception { + runTest("$tname", "$fname", (HIVE_ROOT + "$fpath")); + } + +#end + + private void runTest(String tname, String fname, String fpath) throws Exception { + long startTime = System.currentTimeMillis(); + try { + System.err.println("Begin query: " + fname); + + qt.addFile(fpath); + + if (qt.shouldBeSkipped(fname)) { + return; + } + + qt.cliInit(fname, true); + setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(qt.getConf()); + + int ecode = qt.executeClient(fname); + if (ecode != 0) { + qt.failed(ecode, fname, debugHint); + } + ecode = qt.checkCliDriverResults(fname); + if (ecode != 0) { + qt.failedDiff(ecode, fname, debugHint); + } + } + catch (Throwable e) { + qt.failed(e, fname, debugHint); + } + + long elapsedTime = System.currentTimeMillis() - startTime; + System.err.println("Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s"); + assertTrue("Test passed", true); + } + + private static void setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(HiveConf conf) { + Connection conn = null; + ArrayList statements = new ArrayList(); // list of Statements, PreparedStatements + + try { + Properties props = new Properties(); // connection properties + props.put("user", conf.get("javax.jdo.option.ConnectionUserName")); + props.put("password", conf.get("javax.jdo.option.ConnectionPassword")); + conn = DriverManager.getConnection(conf.get("javax.jdo.option.ConnectionURL"), props); + + if (LOG.isDebugEnabled()) { + LOG.debug("Connected to metastore database "); + } + + String mdbPath = "../../data/files/tpcds-perf/metastore_export/"; + // Setup the table column stats + BufferedReader br = new BufferedReader(new FileReader(new File(mdbPath+"sql/TAB_COL_STATS.sql"))); + String command; + + // Create the column stats table + while ((command = br.readLine()) != null) { + if (!command.endsWith(";")) { + continue; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Going to run command : " + command); + } + try { + PreparedStatement psCommand = conn.prepareStatement(command.substring(0, command.length()-1)); + statements.add(psCommand); + psCommand.execute(); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully completed " + command); + } + } catch (SQLException e) { + LOG.info("Got SQL Exception " + e.getMessage()); + } + } + br.close(); + + File tabColStatsCsv = new File(mdbPath+"csv/TAB_COL_STATS.txt"); + File tabParamsCsv = new File(mdbPath+"csv/TABLE_PARAMS.txt"); + + // Set up the foreign key constraints properly in the TAB_COL_STATS data + String tmpBaseDir = System.getProperty("test.tmp.dir"); + File tmpFileLoc1 = new File(tmpBaseDir+"/TAB_COL_STATS.txt"); + File tmpFileLoc2 = new File(tmpBaseDir+"/TABLE_PARAMS.txt"); + FileUtils.copyFile(tabColStatsCsv, tmpFileLoc1); + FileUtils.copyFile(tabParamsCsv, tmpFileLoc2); + + ResultSet rs = null; + Statement s = conn.createStatement(); + class MyComp implements Comparator { + @Override + public int compare(String str1, String str2) { + if (str2.length() != str1.length()) { + return str2.length() - str1.length(); + } + return str1.compareTo(str2); + } + } + SortedMap tableNameToID = new TreeMap(new MyComp()); + + rs = s.executeQuery("SELECT * FROM APP.TBLS"); + while(rs.next()) { + String tblName = rs.getString("TBL_NAME"); + Integer tblId = rs.getInt("TBL_ID"); + tableNameToID.put(tblName, tblId); + + if (LOG.isDebugEnabled()) { + LOG.debug("Resultset : " + tblName + " | " + tblId); + } + } + for (Map.Entry entry : tableNameToID.entrySet()) { + String toReplace = "_" + entry.getKey() + "_" ; + String replacementString = ""+entry.getValue(); + try { + String content1 = FileUtils.readFileToString(tmpFileLoc1, "UTF-8"); + content1 = content1.replaceAll(toReplace, replacementString); + FileUtils.writeStringToFile(tmpFileLoc1, content1, "UTF-8"); + String content2 = FileUtils.readFileToString(tmpFileLoc2, "UTF-8"); + content2 = content2.replaceAll(toReplace, replacementString); + FileUtils.writeStringToFile(tmpFileLoc2, content2, "UTF-8"); + } catch (IOException e) { + //Simple exception handling, replace with what's necessary for your use case! + LOG.info("Generating file failed", e); + } + } + + // Load the column stats and table params with 30 TB scale + String importStatement1 = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE_LOBS_FROM_EXTFILE(null, '" + "TAB_COL_STATS" + + "', '" + tmpFileLoc1.getAbsolutePath() + + "', ',', null, 'UTF-8', 1)"; + String importStatement2 = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE_LOBS_FROM_EXTFILE(null, '" + "TABLE_PARAMS" + + "', '" + tmpFileLoc2.getAbsolutePath() + + "', ',', null, 'UTF-8', 1)"; + try { + PreparedStatement psImport1 = conn.prepareStatement(importStatement1); + if (LOG.isDebugEnabled()) { + LOG.debug("Going to execute : " + importStatement1); + } + statements.add(psImport1); + psImport1.execute(); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully completed " + importStatement1); + } + PreparedStatement psImport2 = conn.prepareStatement(importStatement2); + if (LOG.isDebugEnabled()) { + LOG.debug("Going to execute : " + importStatement2); + } + statements.add(psImport2); + psImport2.execute(); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully completed " + importStatement2); + } + } catch (SQLException e) { + LOG.info("Got SQL Exception " + e.getMessage()); + } + } catch (FileNotFoundException e1) { + LOG.info("Got File not found Exception " + e1.getMessage()); + } catch (IOException e1) { + LOG.info("Got IOException " + e1.getMessage()); + } catch (SQLException e1) { + LOG.info("Got SQLException " + e1.getMessage()); + } finally { + // Statements and PreparedStatements + int i = 0; + while (!statements.isEmpty()) { + // PreparedStatement extend Statement + Statement st = (Statement)statements.remove(i); + try { + if (st != null) { + st.close(); + st = null; + } + } catch (SQLException sqle) { + } + } + + //Connection + try { + if (conn != null) { + conn.close(); + conn = null; + } + } catch (SQLException sqle) { + } + } + } +}