diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index a13ad28313..c71f1357f0 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -907,7 +907,8 @@ minillaplocal.query.files=\ dynamic_semijoin_reduction_sw2.q,\ partialdhj.q,\ stats_date.q,\ - dst.q + dst.q,\ + q93_with_constraints.sql encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_insert_partition_static.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index 8214cc9a0d..e684432606 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -27,6 +27,21 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule { + public static final HiveJoinProjectTransposeRule LEFF_PROJECT_BTW_JOIN = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + operand(HiveProject.class, operand(HiveJoin.class, any())), + operand(RelNode.class, any())), + "JoinProjectTransposeRule(Project-Join-Other)", + false, HiveRelFactories.HIVE_BUILDER); + public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_BTW_JOIN = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + operand(RelNode.class, any()), + operand(HiveProject.class, operand(HiveJoin.class, any()))), + "JoinProjectTransposeRule(Other-Project-Join)", + false, HiveRelFactories.HIVE_BUILDER); + public static final HiveJoinProjectTransposeRule BOTH_PROJECT = new HiveJoinProjectTransposeRule( operand(HiveJoin.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e6e033066d..d74ee24f4a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1795,7 +1795,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calcitePreCboPlan, mdProvider.getMetadataProvider(), executorProvider); } - // 4. Apply join order optimizations: reordering MST algorithm + // 4.1 Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin. + calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN); + + // 4.2 Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with // the rest of optimizations if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { @@ -2144,6 +2149,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Rerun PPD"); + return basePlan; } diff --git a/ql/src/test/queries/clientpositive/q93_with_constraints.q b/ql/src/test/queries/clientpositive/q93_with_constraints.q new file mode 100644 index 0000000000..2d46bccb8d --- /dev/null +++ b/ql/src/test/queries/clientpositive/q93_with_constraints.q @@ -0,0 +1,88 @@ +drop table if exists store_sales; +create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +alter table store_sales update statistics set ('numRows'='575995635'); + +drop table if exists store_returns; +create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +alter table store_returns update statistics set ('numRows'='57591150'); + +drop table if exists reason; +create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB"); +alter table reason update statistics set ('numRows'='72'); + +alter table store_returns add constraint tpcds_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely; + +explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100; diff --git a/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out b/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out new file mode 100644 index 0000000000..8c2eb2224f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out @@ -0,0 +1,373 @@ +PREHOOK: query: drop table if exists store_sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists store_sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create external table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost decimal(7,2), + ss_list_price decimal(7,2), + ss_sales_price decimal(7,2), + ss_ext_discount_amt decimal(7,2), + ss_ext_sales_price decimal(7,2), + ss_ext_wholesale_cost decimal(7,2), + ss_ext_list_price decimal(7,2), + ss_ext_tax decimal(7,2), + ss_coupon_amt decimal(7,2), + ss_net_paid decimal(7,2), + ss_net_paid_inc_tax decimal(7,2), + ss_net_profit decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: alter table store_sales update statistics set ('numRows'='575995635') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: alter table store_sales update statistics set ('numRows'='575995635') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table if exists store_returns +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists store_returns +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_returns +POSTHOOK: query: create external table store_returns +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt decimal(7,2), + sr_return_tax decimal(7,2), + sr_return_amt_inc_tax decimal(7,2), + sr_fee decimal(7,2), + sr_return_ship_cost decimal(7,2), + sr_refunded_cash decimal(7,2), + sr_reversed_charge decimal(7,2), + sr_store_credit decimal(7,2), + sr_net_loss decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_returns +PREHOOK: query: alter table store_returns update statistics set ('numRows'='57591150') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@store_returns +PREHOOK: Output: default@store_returns +POSTHOOK: query: alter table store_returns update statistics set ('numRows'='57591150') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: default@store_returns +PREHOOK: query: drop table if exists reason +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists reason +POSTHOOK: type: DROPTABLE +PREHOOK: query: create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@reason +POSTHOOK: query: create external table reason +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +row format delimited fields terminated by '\t' +STORED AS ORC tblproperties ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@reason +PREHOOK: query: alter table reason update statistics set ('numRows'='72') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@reason +PREHOOK: Output: default@reason +POSTHOOK: query: alter table reason update statistics set ('numRows'='72') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@reason +POSTHOOK: Output: default@reason +PREHOOK: query: alter table store_returns add constraint tpcds_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table store_returns add constraint tpcds_pk_sr primary key (sr_item_sk, sr_ticket_number) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: sr_reason_sk is not null (type: boolean) + Statistics: Num rows: 57591150 Data size: 875385504 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: sr_reason_sk is not null (type: boolean) + Statistics: Num rows: 54711593 Data size: 831616236 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_reason_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 54711593 Data size: 831616236 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 54711593 Data size: 831616236 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: reason + filterExpr: ((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) (type: boolean) + Statistics: Num rows: 72 Data size: 13160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 365 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: r_reason_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 365 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 365 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 70041069312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col7, _col9, _col10 + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: int), CASE WHEN (_col3 is not null) THEN ((CAST( (_col9 - _col3) AS decimal(10,0)) * _col10)) ELSE ((CAST( _col9 AS decimal(10,0)) * _col10)) END (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(28,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: decimal(28,2)), _col0 (type: int) + sort order: ++ + Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: decimal(28,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 12100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 12100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/perf/tez/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/query93.q.out index d3ab839b42..cb0e5453c5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query93.q.out @@ -48,76 +48,84 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_82] - Limit [LIM_81] (rows=100 width=88) + File Output Operator [FS_89] + Limit [LIM_88] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_80] (rows=316797606 width=88) + Select Operator [SEL_87] (rows=316797606 width=88) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_79] - Group By Operator [GBY_78] (rows=316797606 width=88) + SHUFFLE [RS_86] + Group By Operator [GBY_85] (rows=316797606 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + SHUFFLE [RS_21] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=633595212 width=88) + Group By Operator [GBY_20] (rows=633595212 width=88) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_15] (rows=633595212 width=88) + Select Operator [SEL_18] (rows=633595212 width=88) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_64] (rows=633595212 width=88) - Conds:RS_12._col0, _col2=RS_77._col0, _col2(Inner),Output:["_col3","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_67] (rows=633595212 width=88) + Conds:RS_15._col0, _col2=RS_84._col0, _col2(Inner),Output:["_col3","_col4","_col7","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_12] + PARTITION_ONLY_SHUFFLE [RS_15] PartitionCols:_col0, _col2 - Merge Join Operator [MERGEJOIN_63] (rows=63350266 width=77) - Conds:RS_67._col1=RS_70._col0(Inner),Output:["_col0","_col2","_col3"] + Merge Join Operator [MERGEJOIN_66] (rows=63350266 width=77) + Conds:RS_71._col1=RS_75._col0(Inner),Output:["_col0","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_67] + SHUFFLE [RS_71] PartitionCols:_col1 - Select Operator [SEL_66] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_65] (rows=57591150 width=77) - predicate:(sr_item_sk is not null and sr_reason_sk is not null and sr_ticket_number is not null) - TableScan [TS_0] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] + Select Operator [SEL_70] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_69] (rows=57591150 width=77) + predicate:(_col0 is not null and _col1 is not null and _col2 is not null) + Select Operator [SEL_68] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_70] + SHUFFLE [RS_75] PartitionCols:_col0 - Select Operator [SEL_69] (rows=36 width=200) + Select Operator [SEL_74] (rows=36 width=200) Output:["_col0"] - Filter Operator [FIL_68] (rows=36 width=200) - predicate:((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) - TableScan [TS_3] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + Filter Operator [FIL_73] (rows=36 width=200) + predicate:((_col1 = 'Did not like the warranty') and _col0 is not null) + Select Operator [SEL_72] (rows=72 width=200) + Output:["_col0","_col1"] + TableScan [TS_4] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_77] + SHUFFLE [RS_84] PartitionCols:_col0, _col2 - Select Operator [SEL_76] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_75] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_12_store_returns_sr_item_sk_min) AND DynamicValue(RS_12_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_12_store_returns_sr_item_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_12_store_returns_sr_ticket_number_min) AND DynamicValue(RS_12_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_12_store_returns_sr_ticket_number_bloom_filter))) and ss_item_sk is not null and ss_ticket_number is not null) - TableScan [TS_6] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_72] - Group By Operator [GBY_71] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] - Group By Operator [GBY_54] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_53] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_63] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_74] - Group By Operator [GBY_73] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_60] - Group By Operator [GBY_59] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_58] (rows=63350266 width=77) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_63] + Select Operator [SEL_83] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_82] (rows=575995635 width=88) + predicate:(_col0 is not null and _col2 is not null) + Select Operator [SEL_81] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_80] (rows=575995635 width=88) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_15_store_returns_sr_item_sk_min) AND DynamicValue(RS_15_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_15_store_returns_sr_item_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_15_store_returns_sr_ticket_number_min) AND DynamicValue(RS_15_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_15_store_returns_sr_ticket_number_bloom_filter)))) + TableScan [TS_8] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_77] + Group By Operator [GBY_76] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_58] + Group By Operator [GBY_57] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_56] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_66] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_79] + Group By Operator [GBY_78] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_63] + Group By Operator [GBY_62] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] + Select Operator [SEL_61] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_66]