diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index 1f533bc540..e97e44796f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -308,7 +308,22 @@ private boolean createDerivatives(final List resultExprs, final Op CommonJoinOperator joinOp = (CommonJoinOperator) currentOp; // 2. Backtrack expression to join output - final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp); + ExprNodeDesc expr = currentNode; + if (currentOp != op) { + if (expr instanceof ExprNodeColumnDesc) { + // Expression refers to output of current operator, but backtrack methods works + // from the input columns, hence we need to make resolution for current operator + // here. If the operator was already the join, there is nothing to do + if (op.getColumnExprMap() != null) { + expr = op.getColumnExprMap().get(((ExprNodeColumnDesc) expr).getColumn()); + } + } else { + // TODO: We can extend to other expression types + // We are done + return true; + } + } + final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(expr, op, joinOp); if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) { // TODO: We can extend to other expression types // We are done diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query24.q b/ql/src/test/queries/clientpositive/perf/cbo_query24.q index 02bcbafb7e..8994de7a23 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query24.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query24.q @@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name diff --git a/ql/src/test/queries/clientpositive/perf/query24.q b/ql/src/test/queries/clientpositive/perf/query24.q index 007d7ee415..b3cdaef4a5 100644 --- a/ql/src/test/queries/clientpositive/perf/query24.q +++ b/ql/src/test/queries/clientpositive/perf/query24.q @@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 4e2e8e7cf6..91fe702cde 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[104][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[107][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -138,8 +140,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col4 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -147,43 +149,43 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564) - Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899) - Reducer 17 <- Reducer 16 (GROUP, 640) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 887), Map 20 (PARTITION-LEVEL SORT, 887) + Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 989), Reducer 13 (PARTITION-LEVEL SORT, 989) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 442), Reducer 14 (PARTITION-LEVEL SORT, 442) + Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 516), Reducer 15 (PARTITION-LEVEL SORT, 516) + Reducer 17 <- Reducer 16 (GROUP, 529) Reducer 18 <- Reducer 17 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 12 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), ca_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9 + 0 _col2 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7 input vertices: 1 Map 19 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -191,23 +193,43 @@ STAGE PLANS: Map Operator Tree: TableScan alias: customer - filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized Map 21 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized + Map 22 Map Operator Tree: TableScan alias: item @@ -227,7 +249,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 22 + Map 23 Map Operator Tree: TableScan alias: store_returns @@ -246,109 +268,92 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 23 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), upper(_col2) (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col11, _col12, _col13 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col13 <> upper(_col3)) (type: boolean) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col9 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col4 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col9 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col14, _col17, _col18 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) + key expressions: _col14 (type: int) + sort order: + + Map-reduce partition columns: _col14 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string), _col17 (type: int), _col18 (type: decimal(7,2)) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + 0 _col14 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col14, _col17, _col18, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: string), _col13 (type: string) + key expressions: _col14 (type: int), _col17 (type: int) sort order: ++ - Map-reduce partition columns: _col9 (type: string), _col13 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) + Map-reduce partition columns: _col14 (type: int), _col17 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string), _col18 (type: decimal(7,2)), _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int) Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: string), _col13 (type: string) - 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + 0 _col14 (type: int), _col17 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col18, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) - keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string) + aggregations: sum(_col18) + keys: _col11 (type: string), _col12 (type: string), _col1 (type: string), _col5 (type: string), _col7 (type: string), _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int) sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) Reducer 17 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: decimal(7,2)), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: decimal(17,2)) outputColumnNames: _col10 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col10), count(_col10) mode: hash @@ -381,7 +386,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -396,8 +401,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col4 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -405,11 +410,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) - Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 6 (PARTITION-LEVEL SORT, 400) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 8 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 516), Reducer 3 (PARTITION-LEVEL SORT, 516) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 529) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 887), Map 7 (PARTITION-LEVEL SORT, 887) #### A masked pattern was here #### Vertices: Map 1 @@ -433,6 +438,26 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Map 10 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Execution mode: vectorized + Map 11 Map Operator Tree: TableScan alias: store_returns @@ -451,27 +476,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), upper(_col2) (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: item @@ -491,29 +496,39 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 9 + Map 7 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), ca_country (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7 + input vertices: + 1 Map 9 + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string) Execution mode: vectorized - Reducer 2 Local Work: Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -523,38 +538,28 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15 - input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string) + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col9 (type: int) + outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col13, _col14, _col17, _col21, _col23 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col13 (type: string), _col14 (type: string), _col17 (type: string), _col21 (type: string), _col23 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -563,61 +568,45 @@ STAGE PLANS: keys: 0 _col0 (type: int), _col3 (type: int) 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col15 (type: string), _col19 (type: string) - sort order: ++ - Map-reduce partition columns: _col15 (type: string), _col19 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col15 (type: string), _col19 (type: string) - 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col13, _col14, _col17, _col21, _col23 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string) + keys: _col13 (type: string), _col14 (type: string), _col21 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col17 (type: string), _col23 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) sort order: +++++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col9 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Execution mode: vectorized Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int), KEY._col8 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: decimal(7,2)), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: int), KEY._col7 (type: string), KEY._col8 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2)) - outputColumnNames: _col1, _col2, _col7, _col9 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col5, _col7, _col9 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col9) - keys: _col1 (type: string), _col2 (type: string), _col7 (type: string) + keys: _col4 (type: string), _col5 (type: string), _col7 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -627,21 +616,44 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: 1 Reducer 18 - Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 61565902849 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col3 > _col4) (type: boolean) - Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col11, _col12, _col13 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col13 <> upper(_col3)) (type: boolean) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: int), _col11 (type: string), _col12 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col2, _col3, _col6, _col9, _col10, _col12 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col9 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col9 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col6 (type: string), _col10 (type: string), _col12 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out index 53220d2b0e..1d005b86e1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -115,57 +117,58 @@ CBO PLAN: HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) - HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)]) - HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) - HiveAggregate(group=[{0, 6, 7, 15, 16, 18, 19, 21, 23}], agg#0=[sum($13)]) - HiveJoin(condition=[AND(=($8, UPPER($2)), =($24, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)]) + HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) + HiveAggregate(group=[{8, 9, 11, 12, 15, 16, 19, 23, 25}], agg#0=[sum($6)]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[AND(=($1, $11), =($2, $20))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], ca_country=[$8], s_store_sk=[$9], s_store_name=[$10], s_market_id=[$11], s_state=[$12], s_zip=[$13]) + HiveJoin(condition=[AND(=($1, $5), <>($4, UPPER($8)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], ca_country=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(_o__c0=[*(0.05, /($0, $1))]) HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) - HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10]) - HiveAggregate(group=[{3, 4, 12, 14, 17, 18, 19, 20, 21, 22}], agg#0=[sum($10)]) - HiveJoin(condition=[AND(=($5, UPPER($24)), =($15, $23))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) + HiveAggregate(group=[{7, 8, 11, 15, 17, 20, 21, 22, 23, 24}], agg#0=[sum($4)]) + HiveJoin(condition=[AND(=($3, $26), =($0, $25))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $19)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $5), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[AND(=($1, $5), <>($4, UPPER($8)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], ca_country=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 34cc51b62c..0801f34472 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -115,54 +117,55 @@ CBO PLAN: HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) - HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)]) - HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) - HiveAggregate(group=[{0, 6, 7, 15, 16, 17, 18, 20, 21}], agg#0=[sum($13)]) - HiveJoin(condition=[AND(=($8, $2), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($14)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(_o__c0=[*(0.05, /($0, $1))]) - HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) - HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10]) - HiveAggregate(group=[{3, 4, 12, 13, 16, 17, 18, 19, 20, 21}], agg#0=[sum($10)]) - HiveJoin(condition=[AND(=($5, $23), =($14, $22))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($14)]) + HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)]) + HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) + HiveAggregate(group=[{8, 9, 10, 11, 14, 15, 18, 22, 23}], agg#0=[sum($6)]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[AND(=($1, $10), =($2, $19))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(_o__c0=[*(0.05, /($0, $1))]) + HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) + HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24, 25}], agg#0=[sum($6)]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($0, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($17, $12), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out index 7fbbc0518e..fb77386d6e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -114,230 +116,248 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Map 24 <- Reducer 20 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 23 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) -Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 25 <- Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 24 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 23 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 13 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 21 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 24 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_88] - Select Operator [SEL_87] (rows=78393744 width=380) + Reducer 6 + File Output Operator [FS_91] + Select Operator [SEL_90] (rows=1313165 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_86] (rows=78393744 width=492) + Filter Operator [FIL_89] (rows=1313165 width=492) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_287] (rows=235181232 width=492) + Merge Join Operator [MERGEJOIN_298] (rows=3939496 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Select Operator [SEL_339] (rows=1 width=112) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + Select Operator [SEL_349] (rows=1 width=112) Output:["_col0"] - Group By Operator [GBY_338] (rows=1 width=120) + Group By Operator [GBY_348] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - Group By Operator [GBY_336] (rows=1 width=120) + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_346] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_335] (rows=2121289008973 width=932) + Select Operator [SEL_345] (rows=8029453 width=932) Output:["_col10"] - Group By Operator [GBY_334] (rows=2121289008973 width=932) + Group By Operator [GBY_344] (rows=8029453 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_75] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_74] (rows=2121289008973 width=932) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col10, _col11, _col6, _col7, _col14, _col15, _col16, _col17, _col18, _col21 - Merge Join Operator [MERGEJOIN_286] (rows=2121289008973 width=932) - Conds:RS_70._col12, _col8=RS_323._col2, _col1(Inner),Output:["_col4","_col6","_col7","_col10","_col11","_col14","_col15","_col16","_col17","_col18","_col21"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col2, _col1 - Select Operator [SEL_321] (rows=40000000 width=359) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_320] (rows=40000000 width=272) - predicate:(ca_zip is not null and upper(ca_country) is not null) - TableScan [TS_14] (rows=40000000 width=272) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_state","ca_zip","ca_country"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col12, _col8 - Merge Join Operator [MERGEJOIN_285] (rows=537799796 width=1023) - Conds:RS_67._col0, _col3=RS_319._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col8","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col18"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] - PartitionCols:_col0, _col1 - Select Operator [SEL_317] (rows=57591150 width=8) - Output:["_col0","_col1"] - TableScan [TS_12] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_284] (rows=385681992 width=1029) - Conds:RS_64._col0=RS_291._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col18"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_291] - PartitionCols:_col0 - Select Operator [SEL_289] (rows=462000 width=384) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_3] (rows=462000 width=384) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_283] (rows=385681992 width=648) - Conds:RS_61._col1=RS_316._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col10","_col11","_col12"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] - PartitionCols:_col0 - Select Operator [SEL_314] (rows=80000000 width=276) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_313] (rows=80000000 width=276) - predicate:c_birth_country is not null - TableScan [TS_9] (rows=80000000 width=276) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_282] (rows=385681992 width=379) - Conds:RS_333._col2=RS_302._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col8"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] - PartitionCols:_col0 - Select Operator [SEL_299] (rows=155 width=267) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_298] (rows=155 width=271) - predicate:((s_market_id = 7) and s_zip is not null) - TableScan [TS_6] (rows=1704 width=270) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] - PartitionCols:_col2 - Select Operator [SEL_332] (rows=525333486 width=122) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_331] (rows=525333486 width=122) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_59_store_s_store_sk_min) AND DynamicValue(RS_59_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_59_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) - TableScan [TS_42] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_330] - Group By Operator [GBY_329] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_303] (rows=155 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_299] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_328] - Select Operator [SEL_327] (rows=235181232 width=380) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_326] (rows=235181232 width=380) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col1, _col2, _col7 - Select Operator [SEL_325] (rows=365777643230 width=843) - Output:["_col1","_col2","_col7","_col9"] - Group By Operator [GBY_324] (rows=365777643230 width=843) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_33] (rows=365777643230 width=843) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col15, _col16, _col11, _col20, _col6, _col7, _col8, _col9, _col12 - Merge Join Operator [MERGEJOIN_281] (rows=365777643230 width=843) - Conds:RS_29._col13, _col17=RS_322._col1, _col2(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col11","_col12","_col15","_col16","_col20"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] - PartitionCols:_col1, _col2 - Please refer to the previous Select Operator [SEL_321] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col13, _col17 - Merge Join Operator [MERGEJOIN_280] (rows=92733777 width=910) - Conds:RS_26._col0, _col3=RS_318._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] + Group By Operator [GBY_77] (rows=8029453 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col13)"],keys:_col2, _col3, _col6, _col15, _col16, _col19, _col20, _col21, _col22, _col23 + Merge Join Operator [MERGEJOIN_297] (rows=13238221 width=865) + Conds:RS_73._col9, _col12=RS_333._col0, _col1(Inner),Output:["_col2","_col3","_col6","_col13","_col15","_col16","_col19","_col20","_col21","_col22","_col23"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_279] (rows=56246341 width=899) - Conds:RS_23._col1=RS_315._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Select Operator [SEL_331] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_23] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col9, _col12 + Merge Join Operator [MERGEJOIN_296] (rows=8029453 width=828) + Conds:RS_70._col9=RS_302._col0(Inner),Output:["_col2","_col3","_col6","_col9","_col12","_col13","_col15","_col16","_col19","_col20","_col21","_col22","_col23"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_314] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=56246341 width=630) - Conds:RS_20._col2=RS_300._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col8","_col9","_col11","_col12","_col13"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_299] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_277] (rows=76612563 width=382) - Conds:RS_312._col0=RS_292._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] - PartitionCols:_col0 - Select Operator [SEL_290] (rows=7000 width=295) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_288] (rows=7000 width=384) - predicate:(i_color = 'orchid') - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - PartitionCols:_col0 - Select Operator [SEL_311] (rows=525333486 width=122) + Select Operator [SEL_300] (rows=462000 width=384) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_3] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_295] (rows=8029453 width=448) + Conds:RS_67._col7, _col11=RS_316._col3, _col0(Inner),Output:["_col2","_col3","_col6","_col9","_col12","_col13","_col15","_col16"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_316] + PartitionCols:_col3, _col0 + Select Operator [SEL_314] (rows=155 width=267) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_313] (rows=155 width=271) + predicate:((s_market_id = 7) and s_zip is not null) + TableScan [TS_9] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col7, _col11 + Merge Join Operator [MERGEJOIN_294] (rows=525333486 width=473) + Conds:RS_64._col0=RS_343._col1(Inner),Output:["_col2","_col3","_col6","_col7","_col9","_col11","_col12","_col13"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] + PartitionCols:_col1 + Select Operator [SEL_342] (rows=525333486 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_310] (rows=525333486 width=122) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_18_item_i_item_sk_min) AND DynamicValue(RS_18_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_21_store_s_store_sk_min) AND DynamicValue(RS_21_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_21_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=122) + Filter Operator [FIL_341] (rows=525333486 width=122) + predicate:((ss_store_sk BETWEEN DynamicValue(RS_68_store_s_store_sk_min) AND DynamicValue(RS_68_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_68_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_50] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_297] - Group By Operator [GBY_296] (rows=1 width=12) + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_340] + Group By Operator [GBY_339] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_319] + Group By Operator [GBY_318] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=7000 width=4) + Select Operator [SEL_317] (rows=155 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_309] - Group By Operator [GBY_308] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - Group By Operator [GBY_304] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_301] (rows=155 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_299] + Please refer to the previous Select Operator [SEL_314] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col0 + Filter Operator [FIL_63] (rows=80000000 width=635) + predicate:(_col4 <> _col8) + Merge Join Operator [MERGEJOIN_293] (rows=80000000 width=635) + Conds:RS_323._col1=RS_312._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=40000000 width=363) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_309] (rows=40000000 width=276) + predicate:ca_zip is not null + TableScan [TS_6] (rows=40000000 width=276) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col1 + Select Operator [SEL_321] (rows=80000000 width=280) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_320] (rows=80000000 width=280) + predicate:c_current_addr_sk is not null + TableScan [TS_12] (rows=80000000 width=280) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_338] + Select Operator [SEL_337] (rows=3939496 width=380) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_336] (rows=3939496 width=380) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col4, _col5, _col7 + Select Operator [SEL_335] (rows=84010488 width=843) + Output:["_col4","_col5","_col7","_col9"] + Group By Operator [GBY_334] (rows=84010488 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_35] (rows=84010488 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col12, _col13, _col20, _col6, _col7, _col8, _col9, _col16, _col21 + Merge Join Operator [MERGEJOIN_292] (rows=138508741 width=824) + Conds:RS_31._col0, _col3=RS_332._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_291] (rows=84010488 width=820) + Conds:RS_28._col1, _col2=RS_29._col0, _col9(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0, _col9 + Select Operator [SEL_22] (rows=7276996 width=724) + Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col11"] + Filter Operator [FIL_21] (rows=7276996 width=724) + predicate:(_col12 <> _col3) + Merge Join Operator [MERGEJOIN_290] (rows=7276996 width=724) + Conds:RS_18._col0=RS_322._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col10","_col11","_col12"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_289] (rows=611379 width=452) + Conds:RS_311._col2=RS_315._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] + PartitionCols:_col3 + Please refer to the previous Select Operator [SEL_314] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] + PartitionCols:_col2 + Please refer to the previous Select Operator [SEL_310] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_288] (rows=76612563 width=382) + Conds:RS_330._col0=RS_303._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_303] + PartitionCols:_col0 + Select Operator [SEL_301] (rows=7000 width=295) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_299] (rows=7000 width=384) + predicate:(i_color = 'orchid') + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] + PartitionCols:_col0 + Select Operator [SEL_329] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_328] (rows=525333486 width=122) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_customer_c_customer_sk_min) AND DynamicValue(RS_29_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_item_i_item_sk_min) AND DynamicValue(RS_26_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_29_store_s_store_sk_min) AND DynamicValue(RS_29_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_29_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_149] + Group By Operator [GBY_148] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] + Select Operator [SEL_147] (rows=7276996 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_22] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_327] + Group By Operator [GBY_326] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_152] (rows=7276996 width=8) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_22] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_308] + Group By Operator [GBY_307] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_304] (rows=7000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_301] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 902358a524..43ece85275 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -114,234 +116,242 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Map 24 <- Reducer 20 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 23 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) -Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 24 <- Reducer 19 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Reducer 10 <- Map 23 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 19 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_91] - Select Operator [SEL_90] (rows=78393744 width=380) + Reducer 6 + File Output Operator [FS_94] + Select Operator [SEL_93] (rows=1313165 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_89] (rows=78393744 width=492) + Filter Operator [FIL_92] (rows=1313165 width=492) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_290] (rows=235181232 width=492) + Merge Join Operator [MERGEJOIN_301] (rows=3939496 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_345] - Select Operator [SEL_344] (rows=1 width=112) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_351] + Select Operator [SEL_350] (rows=1 width=112) Output:["_col0"] - Group By Operator [GBY_343] (rows=1 width=120) + Group By Operator [GBY_349] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] - Group By Operator [GBY_341] (rows=1 width=120) + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Group By Operator [GBY_347] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_340] (rows=2121289008973 width=932) + Select Operator [SEL_346] (rows=576061174 width=932) Output:["_col10"] - Group By Operator [GBY_339] (rows=2121289008973 width=932) + Group By Operator [GBY_345] (rows=576061174 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_78] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_81] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_77] (rows=2121289008973 width=932) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col11, _col12, _col6, _col8, _col15, _col16, _col17, _col18, _col19, _col22 - Merge Join Operator [MERGEJOIN_289] (rows=2121289008973 width=932) - Conds:RS_73._col9, _col13=RS_328._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] + Group By Operator [GBY_80] (rows=576061174 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col18)"],keys:_col11, _col12, _col1, _col5, _col7, _col20, _col21, _col22, _col23, _col24 + Merge Join Operator [MERGEJOIN_300] (rows=589731269 width=928) + Conds:RS_76._col14, _col17=RS_332._col0, _col1(Inner),Output:["_col1","_col5","_col7","_col11","_col12","_col18","_col20","_col21","_col22","_col23","_col24"] <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] - PartitionCols:_col1, upper(_col2) - Select Operator [SEL_326] (rows=40000000 width=272) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_325] (rows=40000000 width=272) - predicate:(ca_zip is not null and upper(ca_country) is not null) - TableScan [TS_15] (rows=40000000 width=272) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_state","ca_zip","ca_country"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col9, _col13 - Merge Join Operator [MERGEJOIN_288] (rows=537799796 width=1023) - Conds:RS_70._col0, _col3=RS_324._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] - PartitionCols:_col0, _col1 - Select Operator [SEL_322] (rows=57591150 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=57591150 width=8) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_287] (rows=385681992 width=1029) - Conds:RS_67._col0=RS_297._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] - PartitionCols:_col0 - Select Operator [SEL_294] (rows=462000 width=384) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_292] (rows=462000 width=384) - predicate:i_item_sk is not null - TableScan [TS_3] (rows=462000 width=384) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_286] (rows=385681992 width=648) - Conds:RS_64._col1=RS_320._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] - PartitionCols:_col0 - Select Operator [SEL_318] (rows=80000000 width=276) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_317] (rows=80000000 width=276) - predicate:(c_birth_country is not null and c_customer_sk is not null) - TableScan [TS_9] (rows=80000000 width=276) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_285] (rows=385681992 width=379) - Conds:RS_338._col2=RS_306._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - PartitionCols:_col0 - Select Operator [SEL_303] (rows=155 width=271) - Output:["_col0","_col1","_col3","_col4"] - Filter Operator [FIL_302] (rows=155 width=271) - predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_6] (rows=1704 width=270) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] - PartitionCols:_col2 - Select Operator [SEL_337] (rows=525333486 width=122) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_336] (rows=525333486 width=122) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_62_store_s_store_sk_min) AND DynamicValue(RS_62_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_62_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_43] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_335] - Group By Operator [GBY_334] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] - Group By Operator [GBY_309] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_307] (rows=155 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_303] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - Select Operator [SEL_332] (rows=235181232 width=380) + SHUFFLE [RS_332] + PartitionCols:_col0, _col1 + Select Operator [SEL_330] (rows=57591150 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_329] (rows=57591150 width=8) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_23] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col14, _col17 + Merge Join Operator [MERGEJOIN_299] (rows=576061174 width=936) + Conds:RS_73._col14=RS_308._col0(Inner),Output:["_col1","_col5","_col7","_col11","_col12","_col14","_col17","_col18","_col20","_col21","_col22","_col23","_col24"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_308] + PartitionCols:_col0 + Select Operator [SEL_305] (rows=462000 width=384) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_303] (rows=462000 width=384) + predicate:i_item_sk is not null + TableScan [TS_3] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_298] (rows=576061174 width=555) + Conds:RS_70._col9, _col4=RS_344._col1, _col2(Inner),Output:["_col1","_col5","_col7","_col11","_col12","_col14","_col17","_col18"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col9, _col4 + Filter Operator [FIL_21] (rows=7276996 width=637) + predicate:(_col13 <> upper(_col3)) + Merge Join Operator [MERGEJOIN_293] (rows=7276996 width=637) + Conds:RS_18._col0=RS_321._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col9","_col11","_col12","_col13"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] + PartitionCols:_col1 + Select Operator [SEL_320] (rows=80000000 width=280) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_319] (rows=80000000 width=280) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_12] (rows=80000000 width=280) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_292] (rows=611379 width=365) + Conds:RS_315._col2=RS_318._col4(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] + PartitionCols:_col2 + Select Operator [SEL_314] (rows=40000000 width=276) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_313] (rows=40000000 width=276) + predicate:(ca_address_sk is not null and ca_zip is not null) + TableScan [TS_6] (rows=40000000 width=276) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] + PartitionCols:_col4 + Select Operator [SEL_317] (rows=155 width=271) + Output:["_col0","_col1","_col3","_col4"] + Filter Operator [FIL_316] (rows=155 width=271) + predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) + TableScan [TS_9] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_344] + PartitionCols:_col1, _col2 + Select Operator [SEL_343] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_342] (rows=525333486 width=122) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_70_customer_c_customer_sk_min) AND DynamicValue(RS_70_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_70_customer_c_customer_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_70_store_s_store_sk_min) AND DynamicValue(RS_70_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_70_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_54] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_339] + Group By Operator [GBY_338] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_250] + Group By Operator [GBY_249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] + Select Operator [SEL_248] (rows=7276996 width=8) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_21] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_341] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_255] + Group By Operator [GBY_254] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_253] (rows=7276996 width=8) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_21] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + Select Operator [SEL_336] (rows=3939496 width=380) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_331] (rows=235181232 width=380) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col1, _col2, _col7 - Select Operator [SEL_330] (rows=365777643230 width=843) - Output:["_col1","_col2","_col7","_col9"] - Group By Operator [GBY_329] (rows=365777643230 width=843) + Group By Operator [GBY_335] (rows=3939496 width=380) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col4, _col5, _col7 + Select Operator [SEL_334] (rows=84010488 width=843) + Output:["_col4","_col5","_col7","_col9"] + Group By Operator [GBY_333] (rows=84010488 width=843) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_35] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_37] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_34] (rows=365777643230 width=843) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col17, _col18, _col12, _col22, _col6, _col7, _col9, _col10, _col14 - Merge Join Operator [MERGEJOIN_284] (rows=365777643230 width=843) - Conds:RS_30._col15, _col19=RS_327._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] + Group By Operator [GBY_36] (rows=84010488 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col13, _col14, _col21, _col6, _col7, _col9, _col10, _col17, _col23 + Merge Join Operator [MERGEJOIN_295] (rows=138508741 width=824) + Conds:RS_32._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col13","_col14","_col17","_col21","_col23"] <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] - PartitionCols:_col1, upper(_col2) - Please refer to the previous Select Operator [SEL_326] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col15, _col19 - Merge Join Operator [MERGEJOIN_283] (rows=92733777 width=910) - Conds:RS_27._col0, _col3=RS_323._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_322] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_282] (rows=56246341 width=899) - Conds:RS_24._col1=RS_319._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_331] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_330] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_294] (rows=84010488 width=820) + Conds:RS_29._col1, _col2=RS_30._col0, _col9(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col13","_col14","_col17","_col21","_col23"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0, _col9 + Select Operator [SEL_22] (rows=7276996 width=637) + Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col12"] + Please refer to the previous Filter Operator [FIL_21] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_291] (rows=76612563 width=382) + Conds:RS_328._col0=RS_306._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] + PartitionCols:_col0 + Select Operator [SEL_304] (rows=7000 width=385) + Output:["_col0","_col1","_col2","_col4","_col5"] + Filter Operator [FIL_302] (rows=7000 width=384) + predicate:((i_color = 'orchid') and i_item_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_281] (rows=56246341 width=630) - Conds:RS_21._col2=RS_304._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_303] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_280] (rows=76612563 width=382) - Conds:RS_316._col0=RS_295._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] - PartitionCols:_col0 - Select Operator [SEL_293] (rows=7000 width=385) - Output:["_col0","_col1","_col2","_col4","_col5"] - Filter Operator [FIL_291] (rows=7000 width=384) - predicate:((i_color = 'orchid') and i_item_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] - PartitionCols:_col0 - Select Operator [SEL_315] (rows=525333486 width=122) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_314] (rows=525333486 width=122) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_301] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] - Group By Operator [GBY_298] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=7000 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_293] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_313] - Group By Operator [GBY_312] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - Group By Operator [GBY_308] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_305] (rows=155 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_303] + Select Operator [SEL_327] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_326] (rows=525333486 width=122) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_30_customer_c_customer_sk_min) AND DynamicValue(RS_30_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_30_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_27_item_i_item_sk_min) AND DynamicValue(RS_27_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_27_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_30_store_s_store_sk_min) AND DynamicValue(RS_30_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_30_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_323] + Group By Operator [GBY_322] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_152] + Group By Operator [GBY_151] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] + Select Operator [SEL_150] (rows=7276996 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_22] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_155] (rows=7276996 width=8) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_22] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_312] + Group By Operator [GBY_311] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] + Group By Operator [GBY_309] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_307] (rows=7000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_304]