diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 9aa30129b6..a6c97084c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -34,16 +34,8 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelReferentialConstraint; -import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.*; import org.apache.calcite.rel.core.Aggregate.Group; -import org.apache.calcite.rel.core.AggregateCall; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.RelFactories; -import org.apache.calcite.rel.core.Sort; -import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; @@ -63,6 +55,7 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.*; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -762,4 +755,81 @@ protected static EquivalenceClasses copy(EquivalenceClasses ec) { return newEc; } } + + public static class CardinalityChange { + + public static ImmutableBitSet isCardinalitySameAsSource(RelNode rel, ImmutableBitSet colSet) { + if (rel instanceof HiveFilter) { + return isCardinalitySameAsSource((HiveFilter) rel, colSet); + } else if (rel instanceof HiveProject) { + return isCardinalitySameAsSource((HiveProject) rel, colSet); + } else if (rel instanceof HiveJoin) { + return isCardinalitySameAsSource((HiveJoin)rel, colSet); + } else if (rel instanceof HiveTableScan) { + return isCardinalitySameAsSource((HiveTableScan)rel, colSet); + } else { + return ImmutableBitSet.of(); + } + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveJoin join, ImmutableBitSet colSet) { + final ImmutableBitSet leftBits = + ImmutableBitSet.range(join.getLeft().getRowType().getFieldCount()); + final ImmutableBitSet rightBits = + ImmutableBitSet.range(join.getLeft().getRowType().getFieldCount(), + join.getRight().getRowType().getFieldCount() + + join.getLeft().getRowType().getFieldCount()); + if(leftBits.contains(colSet)) { + if(join.getJoinType().generatesNullsOnLeft()) { + return ImmutableBitSet.of(); + } + return isCardinalitySameAsSource(join.getLeft(), colSet); + } else if(rightBits.contains(colSet)) { + if(join.getJoinType().generatesNullsOnRight()) { + return ImmutableBitSet.of(); + } + //FIXME: wrong colSet need to be remapped + ImmutableBitSet.Builder bitBuilder = ImmutableBitSet.builder(); + int leftCount = join.getLeft().getRowType().getFieldCount(); + for(int i:colSet) { + bitBuilder.set(i - leftCount); + } + return isCardinalitySameAsSource(join.getRight(), bitBuilder.build()); + } else { + return ImmutableBitSet.of(); + } + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveFilter filter, ImmutableBitSet colSet) { + return isCardinalitySameAsSource(filter.getInput(), colSet); + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveProject project, ImmutableBitSet colSet) { + final Map mapInToOutPos = new HashMap<>(); + final List projExprs = project.getProjects(); + + ImmutableBitSet newColSet = colSet; + ImmutableBitSet.Builder inputColSet = ImmutableBitSet.builder(); + + // Build an input to output position map. + for (int i = 0; i < projExprs.size(); i++) { + RexNode projExpr = projExprs.get(i); + if (projExpr instanceof RexInputRef) { + if(colSet.contains(ImmutableBitSet.of(i))) { + newColSet = newColSet.clear(i); + inputColSet.set(((RexInputRef) projExpr).getIndex()); + } + } + } + + if (newColSet.isEmpty()) { + return isCardinalitySameAsSource(project.getInput(), inputColSet.build()); + } + return ImmutableBitSet.of(); + } + + private static ImmutableBitSet isCardinalitySameAsSource(HiveTableScan tableScan, ImmutableBitSet colSet) { + return colSet; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index b7c31bdfca..5e73210169 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -36,6 +36,7 @@ import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; @@ -57,8 +58,10 @@ import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; @@ -312,11 +315,14 @@ private boolean isRexLiteral(final RexNode rexNode) { } } + // if gby keys consist of pk/uk non-pk/non-uk columns are removed if they are not being used private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet fieldsUsed) { ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + //TODO: add a check to see if group by sets contain field which are unused before making call to meatadata + if (aggregate.getGroupSets().size() > 1 || aggregate.getIndicatorCount() > 0 || fieldsUsed.contains(originalGroupSet)) { // if there is grouping sets, indicator or all the group keys are being used we do no need to proceed further @@ -328,6 +334,50 @@ private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet final Set uniqueKeys = mq.getUniqueKeys(input, false); if (uniqueKeys == null || uniqueKeys.isEmpty()) { + ImmutableBitSet backtrackedGBSet = HiveRelOptUtil.CardinalityChange. + isCardinalitySameAsSource(aggregate.getInput(), originalGroupSet); + if(backtrackedGBSet.isEmpty()) { + return originalGroupSet; + } + // since we are able to backtrack the group set, use one of its column to get to the table + RelColumnOrigin columnOrigin = mq.getColumnOrigin(aggregate.getInput(), originalGroupSet.iterator().next()); + assert(columnOrigin != null); // if we were able to backtrack original group set, we should be able to get to table + RelOptHiveTable tbl = (RelOptHiveTable)columnOrigin.getOriginTable(); + + List allKeys = tbl.getNonNullableKeys(); + ImmutableBitSet currentKey = null; + for(ImmutableBitSet key:allKeys) { + if(backtrackedGBSet.contains(key)) { + currentKey = key; + break; + } + } + if(currentKey == null || currentKey.isEmpty()) { + return originalGroupSet; + } + + // we want to delete all columns in original GB set except the key + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + + List backtrackedGBList = backtrackedGBSet.asList(); + List originalGBList = originalGroupSet.asList(); + List keysList = currentKey.asList(); + + // we have established that this gb set contains keys and it is safe to remove rest of the columns + for(int i=0; i 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100; + + + diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 96caa4d6dd..ddfdaa5f67 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2008,3 +2008,535 @@ POSTHOOK: query: DROP TABLE dest_g24 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest_g24 POSTHOOK: Output: default@dest_g24 +PREHOOK: query: CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer +POSTHOOK: query: CREATE TABLE `customer`( + `c_customer_sk` int, + `c_customer_id` string, + `c_current_cdemo_sk` int, + `c_current_hdemo_sk` int, + `c_current_addr_sk` int, + `c_first_shipto_date_sk` int, + `c_first_sales_date_sk` int, + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_day` int, + `c_birth_month` int, + `c_birth_year` int, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer +PREHOOK: query: CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: CREATE TABLE `store_sales`( + `ss_sold_date_sk` int, + `ss_sold_time_sk` int, + `ss_item_sk` int, + `ss_customer_sk` int, + `ss_cdemo_sk` int, + `ss_hdemo_sk` int, + `ss_addr_sk` int, + `ss_store_sk` int, + `ss_promo_sk` int, + `ss_ticket_number` int, + `ss_quantity` int, + `ss_wholesale_cost` decimal(7,2), + `ss_list_price` decimal(7,2), + `ss_sales_price` decimal(7,2), + `ss_ext_discount_amt` decimal(7,2), + `ss_ext_sales_price` decimal(7,2), + `ss_ext_wholesale_cost` decimal(7,2), + `ss_ext_list_price` decimal(7,2), + `ss_ext_tax` decimal(7,2), + `ss_coupon_amt` decimal(7,2), + `ss_net_paid` decimal(7,2), + `ss_net_paid_inc_tax` decimal(7,2), + `ss_net_profit` decimal(7,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table customer add constraint pk_c primary key (c_customer_sk) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@customer +PREHOOK: Output: default@customer +POSTHOOK: query: alter table customer change column c_customer_id c_customer_id string constraint cid_nn not null disable novalidate rely +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@customer +POSTHOOK: Output: default@customer +PREHOOK: query: alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table customer add constraint uk1 UNIQUE(c_customer_id) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_sales add constraint pk_ss primary key (ss_item_sk, ss_ticket_number) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_sales add constraint ss_c foreign key (ss_customer_sk) references customer (c_customer_sk) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select c_customer_id + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select c_customer_id + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{2}]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from store_sales + ,customer + where c_customer_sk = ss_customer_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ) + select t_s_secyear.customer_preferred_cust_flag + from + year_total t_s_secyear + where t_s_secyear.sale_type = 's' + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + +PREHOOK: query: CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: CREATE TABLE `date_dim`( + `d_date_sk` int, + `d_date_id` string, + `d_date` string, + `d_month_seq` int, + `d_week_seq` int, + `d_quarter_seq` int, + `d_year` int, + `d_dow` int, + `d_moy` int, + `d_dom` int, + `d_qoy` int, + `d_fy_year` int, + `d_fy_quarter_seq` int, + `d_fy_week_seq` int, + `d_day_name` string, + `d_quarter_name` string, + `d_holiday` string, + `d_weekend` string, + `d_following_holiday` string, + `d_first_dom` int, + `d_last_dom` int, + `d_same_day_ly` int, + `d_same_day_lq` int, + `d_current_day` string, + `d_current_week` string, + `d_current_month` string, + `d_current_quarter` string, + `d_current_year` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_s_secyear.sale_type = 's' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + order by t_s_secyear.customer_preferred_cust_flag + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(customer_preferred_cust_flag=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$0], c_preferred_cust_flag=[$1]) + HiveAggregate(group=[{1, 4}]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, 0)]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), CAST(2):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +