E.g Q81: create materialized view customer_customer_address as select * from customer c, customer_address ca where c.c_current_addr_sk=ca.ca_address_sk; -- Query does not make use of customer_customer_address explain cbo with customer_total_return as (select cr_returning_customer_sk as ctr_customer_sk ,ca_state as ctr_state, sum(cr_return_amt_inc_tax) as ctr_total_return from catalog_returns ,date_dim ,customer_address where cr_returned_date_sk = d_date_sk and d_year =1998 and cr_returning_addr_sk = ca_address_sk group by cr_returning_customer_sk ,ca_state ) select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset ,ca_location_type,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 from customer_total_return ctr2 where ctr1.ctr_state = ctr2.ctr_state) and ca_address_sk = c_current_addr_sk and ca_state = 'TX' and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset ,ca_location_type,ctr_total_return limit 100 INFO : Starting task [Stage-3:EXPLAIN] in serial mode INFO : Completed executing command(queryId=rbalamohan_20200717063234_feb6c1ae-c0fa-4bb0-8b01-d4bd5316cac8); Time taken: 0.012 seconds INFO : OK Explain CBO PLAN: HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_name=[$3], ca_street_number=[$4], ca_street_name=[$5], ca_street_type=[$6], ca_suite_number=[$7], ca_city=[$8], ca_county=[$9], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"], ca_zip=[$10], ca_country=[$11], ca_gmt_offset=[$12], ca_location_type=[$13], ctr_total_return=[$14]) HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5=[$5], sort6=[$6], sort7=[$7], sort8=[$8], sort9=[$9], sort10=[$10], sort11=[$11], sort12=[$12], sort13=[$13], sort14=[$14], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], dir8=[ASC], dir9=[ASC], dir10=[ASC], dir11=[ASC], dir12=[ASC], dir13=[ASC], dir14=[ASC], fetch=[100]) HiveProject(c_customer_id=[$1], c_salutation=[$3], c_first_name=[$4], c_last_name=[$5], ca_street_number=[$7], ca_street_name=[$8], ca_street_type=[$9], ca_suite_number=[$10], ca_city=[$11], ca_county=[$12], ca_zip=[$13], ca_country=[$14], ca_gmt_offset=[$15], ca_location_type=[$16], ctr_total_return=[$19]) HiveJoin(condition=[AND(=($18, $21), >($19, $20))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($17, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_current_addr_sk=[$4], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12]) HiveFilter(condition=[=($8, _UTF-16LE'TX')]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, customer_address]], table:alias=[customer_address]) HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[IS NOT NULL($8)]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_returning_customer_sk=[$6], cr_returning_addr_sk=[$9], cr_return_amt_inc_tax=[$19], cr_returned_date_sk=[$26]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($6), IS NOT NULL($26))]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, catalog_returns]], table:alias=[catalog_returns]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 1998)]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(/($1, $2), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) HiveProject(ca_state=[$0], cr_returning_customer_sk=[$1], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[IS NOT NULL($8)]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_returning_customer_sk=[$6], cr_returning_addr_sk=[$9], cr_return_amt_inc_tax=[$19], cr_returned_date_sk=[$26]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($26))]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, catalog_returns]], table:alias=[catalog_returns]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 1998)]) HiveTableScan(table=[[tpcds_bin_partitioned_orc_10000, date_dim]], table:alias=[date_dim])