diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 9aa30129b6..a388d09513 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -31,6 +31,7 @@ import java.util.Map.Entry; import java.util.Set; import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelReferentialConstraint; @@ -44,6 +45,7 @@ import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; @@ -762,4 +764,36 @@ protected static EquivalenceClasses copy(EquivalenceClasses ec) { return newEc; } } + + public static Pair> getColumnOriginSet(RelNode rel, ImmutableBitSet colSet) { + RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + Map> tabToOriginColumns = new HashMap<>(); + for(int col:colSet) { + Set columnOrigins = mq.getColumnOrigins(rel, col); + if (null == columnOrigins || columnOrigins.isEmpty()) { + // if even on + return null; + } + // we have either one or multiple origins of the column, we need to make sure that all of the column + for (RelColumnOrigin orgCol : columnOrigins) { + RelOptTable tbl = orgCol.getOriginTable(); + int colOrd = orgCol.getOriginColumnOrdinal(); + List cols = tabToOriginColumns.get(tbl); + if (cols == null) { + cols = new ArrayList<>(); + } + cols.add(colOrd); + tabToOriginColumns.put(tbl, cols); + } + } + + // return the first table which has same number of backtracked columns as colSet + // ideally we should return all, in case one doesn't work we can fall back to another + for(RelOptTable tbl: tabToOriginColumns.keySet()) { + if(tabToOriginColumns.get(tbl).size() == colSet.cardinality()) { + return Pair.of(tbl, tabToOriginColumns.get(tbl)); + } + } + return null; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index b7c31bdfca..b8c32ddc35 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -53,12 +53,14 @@ import org.apache.calcite.sql2rel.RelFieldTrimmer; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.IntPair; import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; @@ -312,6 +314,54 @@ private boolean isRexLiteral(final RexNode rexNode) { } } + + // Given a groupset this tries to find out if the cardinality of the grouping columns could have changed + // because if not and it consist of keys (unique + not null OR pk), we can safely remove rest of the columns + // if those are columns are not being used further up + private ImmutableBitSet generateGroupSetIfCardinalitySame(final Aggregate aggregate, + final ImmutableBitSet originalGroupSet, final ImmutableBitSet fieldsUsed) { + RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery(); + + Pair> tabToOrgCol = HiveRelOptUtil.getColumnOriginSet(aggregate.getInput(), + originalGroupSet); + if(tabToOrgCol == null) { + return originalGroupSet; + } + RelOptHiveTable tbl = (RelOptHiveTable)tabToOrgCol.left; + List backtrackedGBList = tabToOrgCol.right; + ImmutableBitSet backtrackedGBSet = ImmutableBitSet.builder().addAll(backtrackedGBList).build(); + + List allKeys = tbl.getNonNullableKeys(); + ImmutableBitSet currentKey = null; + for(ImmutableBitSet key:allKeys) { + if(backtrackedGBSet.contains(key)) { + // only if grouping sets consist of keys + currentKey = key; + break; + } + } + if(currentKey == null || currentKey.isEmpty()) { + return originalGroupSet; + } + + // we want to delete all columns in original GB set except the key + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + + List originalGBList = originalGroupSet.asList(); + List keysList = currentKey.asList(); + + // we have established that this gb set contains keys and it is safe to remove rest of the columns + for(int i=0; i uniqueKeys = mq.getUniqueKeys(input, false); if (uniqueKeys == null || uniqueKeys.isEmpty()) { - return originalGroupSet; + return generateGroupSetIfCardinalitySame(aggregate, originalGroupSet, fieldsUsed); } // we have set of unique key, get to the key which is same as group by key diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q index 70ab8509c5..5420343a3d 100644 --- a/ql/src/test/queries/clientpositive/constraints_optimization.q +++ b/ql/src/test/queries/clientpositive/constraints_optimization.q @@ -147,3 +147,210 @@ explain select key1 from dest_g24 group by key1, value1; DROP TABLE dest_g21; DROP TABLE dest_g24; + +CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string); + + CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)); + + alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely; + alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely; + alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely; + + alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely; + alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely; + + explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address; + + explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address; + + explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string); + + explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + + diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 96caa4d6dd..ddfdaa5f67 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2008,3 +2008,535 @@ POSTHOOK: query: DROP TABLE dest_g24 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_g24 POSTHOOK: Output: default@dest_g24 +PREHOOK: query: CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer +POSTHOOK: query: CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer +PREHOOK: query: CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@customer +PREHOOK: Output: default@customer +POSTHOOK: query: alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@customer +POSTHOOK: Output: default@customer +PREHOOK: query: alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{2}]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +