diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 9aa30129b6..f9b14651a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -64,6 +64,10 @@ import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.slf4j.Logger; @@ -762,4 +766,85 @@ protected static EquivalenceClasses copy(EquivalenceClasses ec) { return newEc; } } + + // Give a relnode and column set this tries to answer if the cardinality of column set could have + // changed. + // This currently is used to remove grouping columns from group by if the cardinality hasn't changed for the + // columns and it consist of unique/pk + public static class CardinalityChange { + + public static ImmutableBitSet isCardinalitySameAsSource(RelNode rel, ImmutableBitSet colSet) { + if (rel instanceof HiveFilter) { + return isCardinalitySameAsSource((HiveFilter) rel, colSet); + } else if (rel instanceof HiveProject) { + return isCardinalitySameAsSource((HiveProject) rel, colSet); + } else if (rel instanceof HiveJoin) { + return isCardinalitySameAsSource((HiveJoin)rel, colSet); + } else if (rel instanceof HiveTableScan) { + return isCardinalitySameAsSource((HiveTableScan)rel, colSet); + } else { + return ImmutableBitSet.of(); + } + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveJoin join, ImmutableBitSet colSet) { + final ImmutableBitSet leftBits = + ImmutableBitSet.range(join.getLeft().getRowType().getFieldCount()); + final ImmutableBitSet rightBits = + ImmutableBitSet.range(join.getLeft().getRowType().getFieldCount(), + join.getRight().getRowType().getFieldCount() + + join.getLeft().getRowType().getFieldCount()); + if(leftBits.contains(colSet)) { + if(join.getJoinType().generatesNullsOnLeft()) { + return ImmutableBitSet.of(); + } + return isCardinalitySameAsSource(join.getLeft(), colSet); + } else if(rightBits.contains(colSet)) { + if(join.getJoinType().generatesNullsOnRight()) { + return ImmutableBitSet.of(); + } + //FIXME: wrong colSet need to be remapped + ImmutableBitSet.Builder bitBuilder = ImmutableBitSet.builder(); + int leftCount = join.getLeft().getRowType().getFieldCount(); + for(int i:colSet) { + bitBuilder.set(i - leftCount); + } + return isCardinalitySameAsSource(join.getRight(), bitBuilder.build()); + } else { + return ImmutableBitSet.of(); + } + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveFilter filter, ImmutableBitSet colSet) { + return isCardinalitySameAsSource(filter.getInput(), colSet); + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveProject project, ImmutableBitSet colSet) { + final Map mapInToOutPos = new HashMap<>(); + final List projExprs = project.getProjects(); + + ImmutableBitSet newColSet = colSet; + ImmutableBitSet.Builder inputColSet = ImmutableBitSet.builder(); + + // Build an input to output position map. + for (int i = 0; i < projExprs.size(); i++) { + RexNode projExpr = projExprs.get(i); + if (projExpr instanceof RexInputRef) { + if(colSet.contains(ImmutableBitSet.of(i))) { + newColSet = newColSet.clear(i); + inputColSet.set(((RexInputRef) projExpr).getIndex()); + } + } + } + + if (newColSet.isEmpty()) { + return isCardinalitySameAsSource(project.getInput(), inputColSet.build()); + } + return ImmutableBitSet.of(); + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveTableScan tableScan, ImmutableBitSet colSet) { + return colSet; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index b7c31bdfca..9e23111fb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -36,6 +36,7 @@ import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; @@ -59,6 +60,7 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; @@ -312,6 +314,58 @@ private boolean isRexLiteral(final RexNode rexNode) { } } + + // Given a groupset this tries to find out if the cardinality of the grouping columns could have changed + // because if not and it consist of keys (unique + not null OR pk), we can safely remove rest of the columns + // if those are columns are not being used further up + private ImmutableBitSet generateGroupSetIfCardinalitySame(final Aggregate aggregate, + final ImmutableBitSet originalGroupSet, final ImmutableBitSet fieldsUsed) { + RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery(); + + ImmutableBitSet backtrackedGBSet = HiveRelOptUtil.CardinalityChange. + isCardinalitySameAsSource(aggregate.getInput(), originalGroupSet); + if(backtrackedGBSet == null || backtrackedGBSet.isEmpty()) { + return originalGroupSet; + } + // we are able to backtrack the grouping columns i.e. cardinality hasn't changed + // now we need to get to the source table to get to the keys + // since we are able to backtrack the group set, use one of its column to get to the table + RelColumnOrigin columnOrigin = mq.getColumnOrigin(aggregate.getInput(), originalGroupSet.iterator().next()); + assert(columnOrigin != null); // if we were able to backtrack original group set, we should be able to get to table + RelOptHiveTable tbl = (RelOptHiveTable)columnOrigin.getOriginTable(); + + List allKeys = tbl.getNonNullableKeys(); + ImmutableBitSet currentKey = null; + for(ImmutableBitSet key:allKeys) { + if(backtrackedGBSet.contains(key)) { + // only if grouping sets consist of keys + currentKey = key; + break; + } + } + if(currentKey == null || currentKey.isEmpty()) { + return originalGroupSet; + } + + // we want to delete all columns in original GB set except the key + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + + List backtrackedGBList = backtrackedGBSet.asList(); + List originalGBList = originalGroupSet.asList(); + List keysList = currentKey.asList(); + + // we have established that this gb set contains keys and it is safe to remove rest of the columns + for(int i=0; i uniqueKeys = mq.getUniqueKeys(input, false); if (uniqueKeys == null || uniqueKeys.isEmpty()) { - return originalGroupSet; + return generateGroupSetIfCardinalitySame(aggregate, originalGroupSet, fieldsUsed); } // we have set of unique key, get to the key which is same as group by key diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q index 70ab8509c5..5420343a3d 100644 --- a/ql/src/test/queries/clientpositive/constraints_optimization.q +++ b/ql/src/test/queries/clientpositive/constraints_optimization.q @@ -147,3 +147,210 @@ explain select key1 from dest_g24 group by key1, value1; DROP TABLE dest_g21; DROP TABLE dest_g24; + +CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string); + + CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)); + + alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely; + alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely; + alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely; + + alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely; + alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely; + + explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address; + + explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address; + + explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string); + + explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + + diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 96caa4d6dd..ddfdaa5f67 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2008,3 +2008,535 @@ POSTHOOK: query: DROP TABLE dest_g24 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_g24 POSTHOOK: Output: default@dest_g24 +PREHOOK: query: CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer +POSTHOOK: query: CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer +PREHOOK: query: CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@customer +PREHOOK: Output: default@customer +POSTHOOK: query: alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@customer +POSTHOOK: Output: default@customer +PREHOOK: query: alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{2}]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +