diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 5d90c87a67..33205a594f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; -import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -32,8 +31,6 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; @@ -51,13 +48,7 @@ protected static final Logger LOG = LoggerFactory.getLogger(HivePreFilteringRule.class); - private static final Set COMPARISON = EnumSet.of(SqlKind.EQUALS, - SqlKind.GREATER_THAN_OR_EQUAL, - SqlKind.LESS_THAN_OR_EQUAL, - SqlKind.GREATER_THAN, SqlKind.LESS_THAN, - SqlKind.NOT_EQUALS); - - private final FilterFactory filterFactory; + private final FilterFactory filterFactory; // Max number of nodes when converting to CNF private final int maxCNFNodeCount; @@ -120,7 +111,7 @@ public void onMatch(RelOptRuleCall call) { for (RexNode operand : operands) { if (operand.getKind() == SqlKind.OR) { - extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount); + extractedCommonOperands = extractCommonOperands(rexBuilder, filter.getInput(), operand, maxCNFNodeCount); for (RexNode extractedExpr : extractedCommonOperands) { if (operandsToPushDownDigest.add(extractedExpr.toString())) { operandsToPushDown.add(extractedExpr); @@ -155,7 +146,7 @@ public void onMatch(RelOptRuleCall call) { break; case OR: - operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount); + operandsToPushDown = extractCommonOperands(rexBuilder, filter.getInput(), topFilterCondition, maxCNFNodeCount); break; default: return; @@ -191,8 +182,8 @@ public void onMatch(RelOptRuleCall call) { } - private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition, - int maxCNFNodeCount) { + private static List extractCommonOperands(RexBuilder rexBuilder, RelNode input, + RexNode condition, int maxCNFNodeCount) { assert condition.getKind() == SqlKind.OR; Multimap reductionCondition = LinkedHashMultimap.create(); @@ -216,27 +207,12 @@ public void onMatch(RelOptRuleCall call) { return new ArrayList<>(); } RexCall conjCall = (RexCall) conjunction; - RexNode ref = null; - if (COMPARISON.contains(conjCall.getOperator().getKind())) { - if (conjCall.operands.get(0) instanceof RexInputRef - && conjCall.operands.get(1) instanceof RexLiteral) { - ref = conjCall.operands.get(0); - } else if (conjCall.operands.get(1) instanceof RexInputRef - && conjCall.operands.get(0) instanceof RexLiteral) { - ref = conjCall.operands.get(1); - } else { - // We do not know what it is, we bail out for safety - return new ArrayList<>(); - } - } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) { - ref = conjCall.operands.get(0); - } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { - ref = conjCall.operands.get(1); - } else { + Set refs = HiveCalciteUtil.getInputRefs(conjCall); + if (refs.size() != 1) { // We do not know what it is, we bail out for safety return new ArrayList<>(); } - + RexNode ref = rexBuilder.makeInputRef(input, refs.iterator().next()); String stringRef = ref.toString(); reductionCondition.put(stringRef, conjCall); refsInCurrentOperand.add(stringRef); diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 086c291b37..279cb7a152 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -151,11 +151,11 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: f - filterExpr: key is not null (type: boolean) + alias: m + filterExpr: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -168,11 +168,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan - alias: m - filterExpr: ((value <> '') and key is not null) (type: boolean) + alias: f + filterExpr: ((value) IN ('2008-04-08', '2008-04-10', '2008-04-09') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value <> '') and key is not null) (type: boolean) + predicate: ((value) IN ('2008-04-08', '2008-04-10', '2008-04-09') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -191,10 +191,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col1 = '2008-04-08') or (_col1 = '2008-04-10')) and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + predicate: ((((_col3 = '2008-04-08') or (_col3 = '2008-04-10')) and (_col1 = '2008-04-08')) or (_col3 = '2008-04-09')) (type: boolean) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -208,11 +208,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col3 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col2 (type: string) TableScan alias: g filterExpr: (value <> '') (type: boolean) @@ -234,12 +234,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col4 + outputColumnNames: _col2, _col4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -443,10 +443,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: m - filterExpr: ((value <> '') and key is not null) (type: boolean) + filterExpr: ((value) IN ('2008-04-10', '2008-04-08') and (value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value <> '') and key is not null) (type: boolean) + predicate: ((value <> '') and (value) IN ('2008-04-10', '2008-04-08') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out index 4111fa5b71..786f587486 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -123,22 +123,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 8 Map Operator Tree: TableScan - alias: store - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + expressions: hd_demo_sk (type: int), hd_dep_count (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -148,23 +148,23 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int), hd_dep_count (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -172,33 +172,65 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 152), Reducer 2 (PARTITION-LEVEL SORT, 152) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 218), Reducer 3 (PARTITION-LEVEL SORT, 218) + Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 28), Reducer 4 (PARTITION-LEVEL SORT, 28) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan - alias: store_sales - filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer_demographics + filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 6 + Map 2 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10 + input vertices: + 0 Map 1 + Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -217,34 +249,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: customer_demographics - filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized Map 9 Map Operator Tree: TableScan alias: customer_address - filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_state (type: string) @@ -257,22 +269,6 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) Reducer 3 Local Work: Map Reduce Local Work @@ -283,54 +279,62 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14 - Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14, _col16 + outputColumnNames: _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col14 input vertices: 1 Map 8 - Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) (type: boolean) - Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: int) Reducer 4 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col7, _col8, _col9, _col18 - Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10, _col14, _col16 + Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) (type: boolean) - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7, _col8 - input vertices: - 1 Map 10 - Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col16 = 'KY') or (_col16 = 'GA') or (_col16 = 'NM')) and _col10 BETWEEN 100 AND 200) or (((_col16 = 'MT') or (_col16 = 'OR') or (_col16 = 'IN')) and _col10 BETWEEN 150 AND 300) or (((_col16 = 'WI') or (_col16 = 'MO') or (_col16 = 'WV')) and _col10 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 31233897 Data size: 2755463519 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 31233897 Data size: 2755463519 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col14 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col6, _col7, _col8, _col9, _col14, _col19, _col20 + Statistics: Num rows: 34357287 Data size: 3031009936 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) (type: boolean) + Statistics: Num rows: 1431552 Data size: 126291937 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + outputColumnNames: _col6, _col8, _col9 + Statistics: Num rows: 1431552 Data size: 126291937 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col8), count(_col8) + aggregations: sum(_col6), count(_col6), sum(_col8), count(_col8), sum(_col9), count(_col9) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE @@ -338,7 +342,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out index 44665fbf3c..f6a2e1b989 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -255,10 +255,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -315,10 +315,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -355,10 +355,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -431,14 +431,14 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string) + keys: _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int), _col11 (type: string), _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 14 @@ -446,34 +446,34 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int) sort order: +++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col3 (type: int), _col6 (type: decimal(17,2)) Reducer 15 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST - partition by: _col3, _col2, _col4, _col5, _col0 + order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col1, _col0, _col4, _col5, _col2 raw input shape: window functions: window function definition @@ -484,55 +484,55 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int), _col3 (type: int) sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2)) Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: string, _col7: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: int, _col4: int, _col5: string, _col6: string, _col7: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS LAST, _col2 ASC NULLS LAST - partition by: _col4, _col3, _col5, _col6 + order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST + partition by: _col2, _col1, _col5, _col6 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col1, _col2 + arguments: _col3, _col4 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col5 (type: string), _col6 (type: string), _col1 (type: int), _col2 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: int), _col4 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -597,14 +597,14 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string) + keys: _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int), _col11 (type: string), _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 23 @@ -612,39 +612,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int), _col3 (type: int) sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS LAST, _col1 ASC NULLS LAST - partition by: _col3, _col2, _col4, _col5 + order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST + partition by: _col1, _col0, _col4, _col5 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0, _col1 + arguments: _col2, _col3 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -654,7 +654,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -687,14 +687,14 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string) + keys: _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int), _col11 (type: string), _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 4 @@ -702,39 +702,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: int), _col3 (type: int) sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS LAST, _col1 ASC NULLS LAST - partition by: _col3, _col2, _col4, _col5 + order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST + partition by: _col1, _col0, _col4, _col5 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0, _col1 + arguments: _col2, _col3 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -744,7 +744,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out index ed8d936994..4fcc8b7cfb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out @@ -150,7 +150,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -165,8 +165,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -174,33 +174,45 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 147), Reducer 2 (PARTITION-LEVEL SORT, 147) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 55), Reducer 3 (PARTITION-LEVEL SORT, 55) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 218), Reducer 4 (PARTITION-LEVEL SORT, 218) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: store_sales - filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) + filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + predicate: ((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6 - Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) + Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col7 + input vertices: + 0 Map 1 + Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Local Work: + Map Reduce Local Work + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -219,7 +231,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: customer_demographics @@ -238,14 +250,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer_address - filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_state (type: string) @@ -258,65 +270,57 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 2 + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6 - Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col5, _col7 + Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) - Reducer 3 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6 - Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col3, _col5, _col7 + Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col3 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) - Reducer 4 - Local Work: - Map Reduce Local Work + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: decimal(7,2)) + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col6, _col13 - Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col5, _col7, _col14 + Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000)) (type: boolean) - Statistics: Num rows: 85183359 Data size: 7514900682 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4 - input vertices: - 1 Map 9 - Statistics: Num rows: 93701696 Data size: 8266390929 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col14 = 'KY') or (_col14 = 'GA') or (_col14 = 'NM')) and _col7 BETWEEN 0 AND 2000) or (((_col14 = 'MT') or (_col14 = 'OR') or (_col14 = 'IN')) and _col7 BETWEEN 150 AND 3000) or (((_col14 = 'WI') or (_col14 = 'MO') or (_col14 = 'WV')) and _col7 BETWEEN 50 AND 25000)) (type: boolean) + Statistics: Num rows: 31233897 Data size: 2755463519 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 31233897 Data size: 2755463519 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) + aggregations: sum(_col5) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -324,7 +328,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query53.q.out b/ql/src/test/results/clientpositive/perf/spark/query53.q.out index 34593b763a..4e79143138 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query53.q.out @@ -126,10 +126,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: item - filterExpr: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + filterExpr: ((i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and ((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'reference') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + predicate: (((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'reference') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_manufact_id (type: int) diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out index 7d539224e7..5976141c5f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -269,10 +269,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -329,10 +329,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -369,10 +369,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -425,14 +425,14 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) + keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 14 @@ -440,34 +440,34 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int) sort order: ++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: decimal(17,2)) + value expressions: _col3 (type: int), _col5 (type: decimal(17,2)) Reducer 15 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST - partition by: _col4, _col3, _col2, _col0 + order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col1, _col0, _col4, _col2 raw input shape: window functions: window function definition @@ -478,55 +478,55 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) sort order: +++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: int, _col4: int, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS LAST, _col2 ASC NULLS LAST - partition by: _col5, _col4, _col3 + order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST + partition by: _col2, _col1, _col5 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col1, _col2 + arguments: _col3, _col4 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col4 (type: string), _col3 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: string), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -603,14 +603,14 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) + keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 23 @@ -618,39 +618,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) sort order: +++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS LAST, _col1 ASC NULLS LAST - partition by: _col4, _col3, _col2 + order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST + partition by: _col1, _col0, _col4 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0, _col1 + arguments: _col2, _col3 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -660,7 +660,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -681,14 +681,14 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) + keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 4 @@ -696,39 +696,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) sort order: +++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS LAST, _col1 ASC NULLS LAST - partition by: _col4, _col3, _col2 + order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST + partition by: _col1, _col0, _col4 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0, _col1 + arguments: _col2, _col3 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -738,7 +738,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out index dc51332e12..88a35a9257 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -128,10 +128,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: item - filterExpr: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + filterExpr: ((i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and ((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'refernece') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + predicate: (((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'refernece') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_manager_id (type: int) diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index 139a32be44..537377a926 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -182,7 +182,8 @@ POSTHOOK: Input: default@web_sales #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -190,26 +191,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 - Map Operator Tree: - TableScan - alias: web_page - filterExpr: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 12 Map Operator Tree: TableScan @@ -225,25 +206,69 @@ STAGE PLANS: Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col14 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: web_page + filterExpr: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 62), Map 9 (PARTITION-LEVEL SORT, 62) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 57), Reducer 2 (PARTITION-LEVEL SORT, 57) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 81), Reducer 3 (PARTITION-LEVEL SORT, 81) - Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 13), Reducer 4 (PARTITION-LEVEL SORT, 13) - Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 167), Reducer 5 (PARTITION-LEVEL SORT, 167) - Reducer 7 <- Reducer 6 (GROUP, 59) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 20), Map 2 (PARTITION-LEVEL SORT, 20) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 31), Reducer 3 (PARTITION-LEVEL SORT, 31) + Reducer 5 <- Map 13 (PARTITION-LEVEL SORT, 184), Reducer 4 (PARTITION-LEVEL SORT, 184) + Reducer 6 <- Map 14 (PARTITION-LEVEL SORT, 15), Reducer 5 (PARTITION-LEVEL SORT, 15) + Reducer 7 <- Map 15 (PARTITION-LEVEL SORT, 7), Reducer 6 (PARTITION-LEVEL SORT, 7) + Reducer 8 <- Reducer 7 (GROUP, 7) + Reducer 9 <- Reducer 8 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 11 Map Operator Tree: TableScan alias: web_returns @@ -263,26 +288,27 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 10 + Map 13 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: cd1 @@ -302,7 +328,7 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: cd2 @@ -321,63 +347,55 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 9 + Map 2 Map Operator Tree: TableScan alias: web_sales - filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) + filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + predicate: ((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7 + input vertices: + 0 Map 1 + Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Reducer 2 + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14 - Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7 + Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) - Reducer 3 + key expressions: _col2 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -385,101 +403,87 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col8 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col10, _col12, _col13, _col14 - Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE + 0 _col2 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col5 (type: int) + outputColumnNames: _col5, _col6, _col7, _col11, _col12, _col13, _col14, _col16, _col17 + Statistics: Num rows: 21296393 Data size: 2895693863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: int) + 0 _col14 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col12, _col13, _col14 + outputColumnNames: _col5, _col6, _col7, _col11, _col12, _col13, _col16, _col17, _col19 input vertices: - 1 Map 11 - Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19 - input vertices: - 1 Map 12 - Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col19 (type: string) - Reducer 4 + 1 Map 12 + Statistics: Num rows: 23426032 Data size: 3185263318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 23426032 Data size: 3185263318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int), _col16 (type: decimal(7,2)), _col17 (type: decimal(7,2)), _col19 (type: string) + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col12 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19, _col21, _col22 - Statistics: Num rows: 77305913 Data size: 10511369184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col5, _col6, _col7, _col11, _col13, _col16, _col17, _col19, _col21 + Statistics: Num rows: 25768635 Data size: 3503789725 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean) - Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col21 = 'KY') or (_col21 = 'GA') or (_col21 = 'NM')) and _col7 BETWEEN 100 AND 200) or (((_col21 = 'MT') or (_col21 = 'OR') or (_col21 = 'IN')) and _col7 BETWEEN 150 AND 300) or (((_col21 = 'WI') or (_col21 = 'MO') or (_col21 = 'WV')) and _col7 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 8589543 Data size: 1167929636 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), _col21 (type: string), _col22 (type: string) - sort order: +++ - Map-reduce partition columns: _col3 (type: int), _col21 (type: string), _col22 (type: string) - Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) - Reducer 5 + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 8589543 Data size: 1167929636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col16 (type: decimal(7,2)), _col17 (type: decimal(7,2)), _col19 (type: string) + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int), _col21 (type: string), _col22 (type: string) - 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col2, _col6, _col7, _col12, _col14, _col19 - Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) - Reducer 6 + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col13, _col16, _col17, _col19, _col24, _col25 + Statistics: Num rows: 9448497 Data size: 1284722627 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col24 = 'D') and (_col25 = 'Primary') and _col6 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col6 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200)) (type: boolean) + Statistics: Num rows: 787374 Data size: 107060116 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int), _col24 (type: string), _col25 (type: string) + sort order: +++ + Map-reduce partition columns: _col13 (type: int), _col24 (type: string), _col25 (type: string) + Statistics: Num rows: 787374 Data size: 107060116 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col16 (type: decimal(7,2)), _col17 (type: decimal(7,2)), _col19 (type: string) + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col12, _col14, _col19, _col27 - Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) (type: boolean) - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col19 (type: string) - outputColumnNames: _col6, _col7, _col12, _col19 - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col19 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) - Reducer 7 + 0 _col13 (type: int), _col24 (type: string), _col25 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col5, _col16, _col17, _col19 + Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), count(_col5), sum(_col17), count(_col17), sum(_col16), count(_col16) + keys: _col19 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -487,29 +491,29 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(_col1) / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 8 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/spark/query88.q.out b/ql/src/test/results/clientpositive/perf/spark/query88.q.out index fbc5d93886..029da524d0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query88.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query88.q.out @@ -234,19 +234,19 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -254,19 +254,19 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -301,9 +301,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 13 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -311,7 +311,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -355,9 +355,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 18 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -365,7 +365,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -409,9 +409,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 23 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -419,7 +419,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -463,9 +463,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 28 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -473,7 +473,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -517,9 +517,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 33 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -527,7 +527,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -571,9 +571,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 38 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -581,7 +581,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -625,9 +625,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 8 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -635,7 +635,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -810,19 +810,19 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -830,19 +830,19 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -891,9 +891,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 input vertices: 1 Map 3 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE @@ -901,7 +901,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 input vertices: @@ -985,19 +985,19 @@ STAGE PLANS: Map 13 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1005,19 +1005,19 @@ STAGE PLANS: Map 14 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1050,19 +1050,19 @@ STAGE PLANS: Map 18 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1070,19 +1070,19 @@ STAGE PLANS: Map 19 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1115,19 +1115,19 @@ STAGE PLANS: Map 23 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1135,19 +1135,19 @@ STAGE PLANS: Map 24 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1180,19 +1180,19 @@ STAGE PLANS: Map 28 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1200,19 +1200,19 @@ STAGE PLANS: Map 29 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1245,19 +1245,19 @@ STAGE PLANS: Map 33 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1265,19 +1265,19 @@ STAGE PLANS: Map 34 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1310,19 +1310,19 @@ STAGE PLANS: Map 38 Map Operator Tree: TableScan - alias: time_dim - filterExpr: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and (((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + predicate: ((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 214000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -1330,19 +1330,19 @@ STAGE PLANS: Map 39 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: diff --git a/ql/src/test/results/clientpositive/perf/spark/query89.q.out b/ql/src/test/results/clientpositive/perf/spark/query89.q.out index 66eb333e31..f91b78cd04 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -96,8 +96,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) Reducer 6 <- Reducer 5 (SORT, 1) @@ -117,51 +117,51 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 7 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: ((i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants') and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and ((((i_category = 'Home') or (i_category = 'Books') or (i_category = 'Electronics')) and ((i_class = 'wallpaper') or (i_class = 'parenting') or (i_class = 'musical'))) or (((i_category = 'Shoes') or (i_category = 'Jewelry') or (i_category = 'Men')) and ((i_class = 'womens') or (i_class = 'birdal') or (i_class = 'pants')))) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (((((i_category = 'Home') or (i_category = 'Books') or (i_category = 'Electronics')) and ((i_class = 'wallpaper') or (i_class = 'parenting') or (i_class = 'musical'))) or (((i_category = 'Shoes') or (i_category = 'Jewelry') or (i_category = 'Men')) and ((i_class = 'womens') or (i_class = 'birdal') or (i_class = 'pants')))) and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and (i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_moy (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized Map 8 Map Operator Tree: TableScan - alias: item - filterExpr: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), d_moy (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -169,16 +169,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col6 + outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col6 (type: int) + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string) Reducer 3 Local Work: Map Reduce Local Work @@ -187,9 +187,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col6, _col8, _col9, _col10 + outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -197,20 +197,20 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col6, _col8, _col9, _col10, _col12, _col13 + outputColumnNames: _col3, _col5, _col6, _col7, _col10, _col12, _col13 input vertices: 1 Map 9 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col6 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col12 (type: string), _col13 (type: string) + keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col12 (type: string), _col13 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 4 @@ -218,34 +218,34 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col6 (type: decimal(17,2)) + value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col3, _col1, _col4, _col5 + order by: _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col2, _col0, _col4, _col5 raw input shape: window functions: window function definition @@ -256,14 +256,14 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out index ffef171259..7c8827e280 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -115,166 +115,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 9 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_167] - Select Operator [SEL_166] (rows=1 width=256) + File Output Operator [FS_162] + Select Operator [SEL_161] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_165] (rows=1 width=256) + Group By Operator [GBY_160] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_37] Group By Operator [GBY_36] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)"] - Merge Join Operator [MERGEJOIN_121] (rows=8066665 width=1014) - Conds:RS_32._col4=RS_156._col0(Inner),Output:["_col5","_col7","_col8"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - PartitionCols:_col0 - Select Operator [SEL_155] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_154] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col4 - Filter Operator [FIL_31] (rows=7333332 width=1014) - predicate:((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_120] (rows=22000000 width=1014) - Conds:RS_28._col3=RS_148._col0(Inner),Output:["_col4","_col5","_col7","_col8","_col9","_col18"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] - PartitionCols:_col0 - Select Operator [SEL_147] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col3 - Filter Operator [FIL_27] (rows=10647918 width=88) - predicate:(((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) - Merge Join Operator [MERGEJOIN_119] (rows=255550079 width=88) - Conds:RS_24._col2=RS_140._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col16"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] - PartitionCols:_col0 - Select Operator [SEL_139] (rows=7200 width=107) - Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=7200 width=107) - predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_118] (rows=232318249 width=88) - Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - PartitionCols:_col0 - Select Operator [SEL_131] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_130] (rows=1861800 width=385) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=211198404 width=88) - Conds:RS_164._col0=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - PartitionCols:_col0 - Select Operator [SEL_123] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_122] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] - PartitionCols:_col0 - Select Operator [SEL_163] (rows=191998545 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_162] (rows=191998545 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_29_customer_address_ca_address_sk_min) AND DynamicValue(RS_29_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_29_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_33_store_s_store_sk_min) AND DynamicValue(RS_33_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_33_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_137] - Group By Operator [GBY_136] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_133] (rows=1861800 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_131] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=7200 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_139] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_153] - Group By Operator [GBY_152] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - Group By Operator [GBY_150] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_149] (rows=20000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_147] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_157] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_155] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)"] + Select Operator [SEL_35] (rows=1431552 width=88) + Output:["_col6","_col8","_col9"] + Filter Operator [FIL_34] (rows=1431552 width=88) + predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) + Merge Join Operator [MERGEJOIN_121] (rows=34357287 width=88) + Conds:RS_31._col2=RS_148._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_146] (rows=1861800 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_15] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col2 + Filter Operator [FIL_30] (rows=31233897 width=88) + predicate:((((_col16 = 'KY') or (_col16 = 'GA') or (_col16 = 'NM')) and _col10 BETWEEN 100 AND 200) or (((_col16 = 'MT') or (_col16 = 'OR') or (_col16 = 'IN')) and _col10 BETWEEN 150 AND 300) or (((_col16 = 'WI') or (_col16 = 'MO') or (_col16 = 'WV')) and _col10 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_120] (rows=93701693 width=88) + Conds:RS_27._col4=RS_159._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_119] (rows=85183356 width=88) + Conds:RS_24._col3=RS_140._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + PartitionCols:_col0 + Select Operator [SEL_139] (rows=7200 width=107) + Output:["_col0","_col1"] + Filter Operator [FIL_138] (rows=7200 width=107) + predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) + TableScan [TS_9] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_118] (rows=77439413 width=88) + Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_130] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_117] (rows=70399465 width=88) + Conds:RS_124._col0=RS_156._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_122] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col4 + Select Operator [SEL_155] (rows=63999513 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_154] (rows=63999513 width=88) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_32_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_32_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_32_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_133] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_131] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_141] (rows=7200 width=107) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_139] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_153] + Group By Operator [GBY_152] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] + Select Operator [SEL_149] (rows=1861800 width=385) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_147] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_129] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_125] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_123] diff --git a/ql/src/test/results/clientpositive/perf/tez/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/query47.q.out index 0ba3fbf98d..f9c21aa376 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query47.q.out @@ -152,30 +152,30 @@ Stage-0 Select Operator [SEL_328] (rows=63887519 width=88) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_327] (rows=63887519 width=88) - predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) + predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) PTF Operator [PTF_326] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col4, _col3, _col5, _col6"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5, _col6"}] Select Operator [SEL_325] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_324] - PartitionCols:_col3, _col2, _col4, _col5 + PartitionCols:_col1, _col0, _col4, _col5 Select Operator [SEL_323] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] PTF Operator [PTF_322] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5, _col0"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col5, _col2"}] Select Operator [SEL_321] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_310] - PartitionCols:_col3, _col2, _col4, _col5, _col0 + PartitionCols:_col1, _col0, _col4, _col5, _col2 Group By Operator [GBY_307] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_92] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col9, _col11, _col12 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col5, _col6, _col11, _col12 Merge Join Operator [MERGEJOIN_278] (rows=766650239 width=88) Conds:RS_88._col2=RS_298._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] <-Map 16 [SIMPLE_EDGE] vectorized @@ -212,7 +212,7 @@ Stage-0 Select Operator [SEL_281] (rows=73049 width=1119) Output:["_col0","_col1","_col2"] Filter Operator [FIL_280] (rows=73049 width=1119) - predicate:(((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) + predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) TableScan [TS_73] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized @@ -265,12 +265,12 @@ Stage-0 Filter Operator [FIL_313] (rows=383325119 width=88) predicate:rank_window_0 is not null PTF Operator [PTF_312] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col3, _col2, _col4, _col5"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] Select Operator [SEL_311] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_308] - PartitionCols:_col3, _col2, _col4, _col5 + PartitionCols:_col1, _col0, _col4, _col5 Please refer to the previous Group By Operator [GBY_307] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_320] @@ -280,11 +280,11 @@ Stage-0 Filter Operator [FIL_318] (rows=383325119 width=88) predicate:rank_window_0 is not null PTF Operator [PTF_317] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col3, _col2, _col4, _col5"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] Select Operator [SEL_316] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_309] - PartitionCols:_col3, _col2, _col4, _col5 + PartitionCols:_col1, _col0, _col4, _col5 Please refer to the previous Group By Operator [GBY_307] diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index 24ad2da1bb..6def223813 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,135 +143,125 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_133] - Group By Operator [GBY_132] (rows=1 width=8) + File Output Operator [FS_128] + Group By Operator [GBY_127] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col4)"] - Merge Join Operator [MERGEJOIN_96] (rows=93701696 width=88) - Conds:RS_25._col3=RS_123._col0(Inner),Output:["_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - PartitionCols:_col0 - Select Operator [SEL_122] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_121] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col3 - Filter Operator [FIL_24] (rows=85183359 width=88) - predicate:((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_95] (rows=255550079 width=88) - Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col4","_col6","_col13"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] - PartitionCols:_col0 - Select Operator [SEL_114] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_113] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_9] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=232318249 width=88) - Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - PartitionCols:_col0 - Select Operator [SEL_106] (rows=465450 width=385) - Output:["_col0"] - Filter Operator [FIL_105] (rows=465450 width=385) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=211198404 width=88) - Conds:RS_131._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_99] - PartitionCols:_col0 - Select Operator [SEL_98] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_97] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - PartitionCols:_col0 - Select Operator [SEL_130] (rows=191998545 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Filter Operator [FIL_129] (rows=191998545 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_19_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_19_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_19_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_26_store_s_store_sk_min) AND DynamicValue(RS_26_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_26_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=465450 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - Group By Operator [GBY_117] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_116] (rows=20000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_114] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_124] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_122] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] - Group By Operator [GBY_101] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_100] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_98] + Output:["_col0"],aggregations:["sum(_col5)"] + Select Operator [SEL_28] (rows=31233897 width=88) + Output:["_col5"] + Filter Operator [FIL_27] (rows=31233897 width=88) + predicate:((((_col14 = 'KY') or (_col14 = 'GA') or (_col14 = 'NM')) and _col7 BETWEEN 0 AND 2000) or (((_col14 = 'MT') or (_col14 = 'OR') or (_col14 = 'IN')) and _col7 BETWEEN 150 AND 3000) or (((_col14 = 'WI') or (_col14 = 'MO') or (_col14 = 'WV')) and _col7 BETWEEN 50 AND 25000)) + Merge Join Operator [MERGEJOIN_96] (rows=93701693 width=88) + Conds:RS_24._col3=RS_126._col0(Inner),Output:["_col5","_col7","_col14"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_95] (rows=85183356 width=88) + Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col5","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + PartitionCols:_col0 + Select Operator [SEL_114] (rows=465450 width=385) + Output:["_col0"] + Filter Operator [FIL_113] (rows=465450 width=385) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) + TableScan [TS_9] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_94] (rows=77439413 width=88) + Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col5","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_105] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_93] (rows=70399465 width=88) + Conds:RS_99._col0=RS_123._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_99] + PartitionCols:_col0 + Select Operator [SEL_98] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_97] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col3 + Select Operator [SEL_122] (rows=63999513 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col6"] + Filter Operator [FIL_121] (rows=63999513 width=88) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_108] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_116] (rows=465450 width=385) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_114] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + Group By Operator [GBY_101] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_100] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_98] diff --git a/ql/src/test/results/clientpositive/perf/tez/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/query53.q.out index fec6b41cfb..fe169cba21 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query53.q.out @@ -140,7 +140,7 @@ Stage-0 Select Operator [SEL_86] (rows=462000 width=1436) Output:["_col0","_col4"] Filter Operator [FIL_85] (rows=462000 width=1436) - predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) + predicate:(((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'reference') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/query57.q.out index 1d1f870253..7299f9f698 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query57.q.out @@ -146,30 +146,30 @@ Stage-0 Select Operator [SEL_328] (rows=31942874 width=135) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Filter Operator [FIL_327] (rows=31942874 width=135) - predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) + predicate:((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) PTF Operator [PTF_326] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col5, _col4, _col3"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5"}] Select Operator [SEL_325] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_324] - PartitionCols:_col4, _col3, _col2 + PartitionCols:_col1, _col0, _col4 Select Operator [SEL_323] (rows=191657247 width=135) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] PTF Operator [PTF_322] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2, _col0"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col2"}] Select Operator [SEL_321] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_310] - PartitionCols:_col4, _col3, _col2, _col0 + PartitionCols:_col1, _col0, _col4, _col2 Group By Operator [GBY_307] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_92] (rows=383314495 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col10, _col11 + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col10, _col11, _col5, _col6, _col8 Merge Join Operator [MERGEJOIN_278] (rows=383314495 width=135) Conds:RS_88._col2=RS_298._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] <-Map 16 [SIMPLE_EDGE] vectorized @@ -206,7 +206,7 @@ Stage-0 Select Operator [SEL_281] (rows=73049 width=1119) Output:["_col0","_col1","_col2"] Filter Operator [FIL_280] (rows=73049 width=1119) - predicate:(((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) + predicate:(((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) TableScan [TS_73] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized @@ -259,12 +259,12 @@ Stage-0 Filter Operator [FIL_313] (rows=191657247 width=135) predicate:rank_window_0 is not null PTF Operator [PTF_312] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col4, _col3, _col2"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] Select Operator [SEL_311] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_308] - PartitionCols:_col4, _col3, _col2 + PartitionCols:_col1, _col0, _col4 Please refer to the previous Group By Operator [GBY_307] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_320] @@ -274,11 +274,11 @@ Stage-0 Filter Operator [FIL_318] (rows=191657247 width=135) predicate:rank_window_0 is not null PTF Operator [PTF_317] (rows=191657247 width=135) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col4, _col3, _col2"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] Select Operator [SEL_316] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_309] - PartitionCols:_col4, _col3, _col2 + PartitionCols:_col1, _col0, _col4 Please refer to the previous Group By Operator [GBY_307] diff --git a/ql/src/test/results/clientpositive/perf/tez/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/query63.q.out index 941ee5eb2d..349b6cdbc2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query63.q.out @@ -142,7 +142,7 @@ Stage-0 Select Operator [SEL_86] (rows=462000 width=1436) Output:["_col0","_col4"] Filter Operator [FIL_85] (rows=462000 width=1436) - predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) + predicate:(((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'refernece') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] <-Map 1 [SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index 5e2af5a0c6..3af8176f51 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,16 +183,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) +Map 12 <- Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 11 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 7 <- Map 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -202,146 +202,144 @@ Stage-0 Stage-1 Reducer 10 vectorized File Output Operator [FS_244] - Limit [LIM_243] (rows=100 width=1014) + Limit [LIM_243] (rows=100 width=385) Number of rows:100 - Select Operator [SEL_242] (rows=3666666 width=1014) + Select Operator [SEL_242] (rows=1023990 width=385) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_241] - Select Operator [SEL_240] (rows=3666666 width=1014) + Select Operator [SEL_240] (rows=1023990 width=385) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_239] (rows=3666666 width=1014) + Group By Operator [GBY_239] (rows=1023990 width=385) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=7333332 width=1014) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col19 - Select Operator [SEL_47] (rows=7333332 width=1014) - Output:["_col6","_col7","_col12","_col19"] - Filter Operator [FIL_46] (rows=7333332 width=1014) - predicate:((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_206] (rows=22000000 width=1014) - Conds:RS_43._col2=RS_238._col0(Inner),Output:["_col6","_col7","_col12","_col14","_col19","_col27"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0 - Select Operator [SEL_237] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_236] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_21] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_205] (rows=7086373 width=135) - Conds:RS_40._col3, _col21, _col22=RS_234._col0, _col1, _col2(Inner),Output:["_col2","_col6","_col7","_col12","_col14","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_233] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_232] (rows=1861800 width=385) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_18] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col3, _col21, _col22 - Filter Operator [FIL_39] (rows=6442158 width=135) - predicate:(((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_204] (rows=77305913 width=135) - Conds:RS_36._col1=RS_235._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19","_col21","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_233] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_203] (rows=70278102 width=135) - Conds:RS_33._col4=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=72 width=200) - Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=72 width=200) - predicate:r_reason_sk is not null - TableScan [TS_12] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) - Conds:RS_30._col10=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col12","_col13","_col14"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=4602 width=585) - Output:["_col0"] - Filter Operator [FIL_218] (rows=4602 width=585) - predicate:wp_web_page_sk is not null - TableScan [TS_9] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) - Conds:RS_27._col8=RS_212._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col10","_col12","_col13","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_210] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) - Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0, _col5 - Select Operator [SEL_208] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_207] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col1, _col3 - Select Operator [SEL_227] (rows=48000888 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_226] (rows=48000888 width=135) - predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_31_web_page_wp_web_page_sk_min) AND DynamicValue(RS_31_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_31_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - Group By Operator [GBY_214] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Group By Operator [GBY_222] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=4602 width=585) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] + Group By Operator [GBY_48] (rows=2047980 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","count(_col5)","sum(_col17)","count(_col17)","sum(_col16)","count(_col16)"],keys:_col19 + Merge Join Operator [MERGEJOIN_206] (rows=2047980 width=385) + Conds:RS_44._col13, _col24, _col25=RS_237._col0, _col1, _col2(Inner),Output:["_col5","_col16","_col17","_col19"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_236] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_235] (rows=1861800 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_21] (rows=1861800 width=385) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col13, _col24, _col25 + Filter Operator [FIL_43] (rows=787374 width=135) + predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col6 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col6 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200)) + Merge Join Operator [MERGEJOIN_205] (rows=9448497 width=135) + Conds:RS_40._col11=RS_238._col0(Inner),Output:["_col5","_col6","_col13","_col16","_col17","_col19","_col24","_col25"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_236] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col11 + Filter Operator [FIL_39] (rows=8589543 width=135) + predicate:((((_col21 = 'KY') or (_col21 = 'GA') or (_col21 = 'NM')) and _col7 BETWEEN 100 AND 200) or (((_col21 = 'MT') or (_col21 = 'OR') or (_col21 = 'IN')) and _col7 BETWEEN 150 AND 300) or (((_col21 = 'WI') or (_col21 = 'MO') or (_col21 = 'WV')) and _col7 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_204] (rows=25768635 width=135) + Conds:RS_36._col12=RS_234._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col19","_col21"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + PartitionCols:_col0 + Select Operator [SEL_233] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_232] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_15] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col12 + Merge Join Operator [MERGEJOIN_203] (rows=23426032 width=135) + Conds:RS_33._col14=RS_231._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col16","_col17","_col19"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=72 width=200) + Output:["_col0","_col1"] + Filter Operator [FIL_229] (rows=72 width=200) + predicate:r_reason_sk is not null + TableScan [TS_12] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_202] (rows=21296393 width=135) + Conds:RS_30._col2, _col4=RS_228._col0, _col5(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col16","_col17"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0, _col5 + Select Operator [SEL_227] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_226] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_9] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col2, _col4 + Merge Join Operator [MERGEJOIN_201] (rows=19360357 width=135) + Conds:RS_27._col1=RS_217._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_215] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_200] (rows=17600325 width=135) + Conds:RS_209._col0=RS_225._col2(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=4602 width=585) + Output:["_col0"] + Filter Operator [FIL_207] (rows=4602 width=585) + predicate:wp_web_page_sk is not null + TableScan [TS_0] (rows=4602 width=585) + default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col2 + Select Operator [SEL_224] (rows=16000296 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_223] (rows=16000296 width=135) + predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_24_web_page_wp_web_page_sk_min) AND DynamicValue(RS_24_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_24_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_3] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_210] (rows=4602 width=585) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_208] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_218] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216] diff --git a/ql/src/test/results/clientpositive/perf/tez/query88.q.out b/ql/src/test/results/clientpositive/perf/tez/query88.q.out index f515888aed..2d467f80da 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query88.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query88.q.out @@ -295,37 +295,37 @@ Stage-0 SHUFFLE [RS_44] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_567] (rows=696954748 width=88) - Conds:RS_41._col1=RS_642._col0(Inner),Output:["_col2"] + Conds:RS_41._col0=RS_642._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_642] PartitionCols:_col0 - Select Operator [SEL_639] (rows=3600 width=107) + Select Operator [SEL_633] (rows=14400 width=471) Output:["_col0"] - Filter Operator [FIL_638] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_6] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + Filter Operator [FIL_625] (rows=14400 width=471) + predicate:((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) + TableScan [TS_6] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_41] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_566] (rows=633595212 width=88) - Conds:RS_723._col0=RS_606._col0(Inner),Output:["_col1","_col2"] + Conds:RS_723._col1=RS_592._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_606] + SHUFFLE [RS_592] PartitionCols:_col0 - Select Operator [SEL_597] (rows=14400 width=471) + Select Operator [SEL_589] (rows=2000 width=107) Output:["_col0"] - Filter Operator [FIL_589] (rows=14400 width=471) - predicate:((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) - TableScan [TS_3] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + Filter Operator [FIL_588] (rows=2000 width=107) + predicate:((((hd_dep_count = 3) and hd_vehicle_count is not null) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and (hd_dep_count) IN (3, 0, 1) and (hd_vehicle_count <= 5) and hd_demo_sk is not null) + TableScan [TS_3] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Map 62 [SIMPLE_EDGE] vectorized SHUFFLE [RS_723] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_722] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_721] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_42_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_42_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_42_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_39_time_dim_t_time_sk_min) AND DynamicValue(RS_39_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_39_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_39_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_39_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_39_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_42_time_dim_t_time_sk_min) AND DynamicValue(RS_42_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_42_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_26] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -333,12 +333,12 @@ Stage-0 Group By Operator [GBY_715] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_629] - Group By Operator [GBY_621] (rows=1 width=12) + SHUFFLE [RS_615] + Group By Operator [GBY_607] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_607] (rows=14400 width=471) + Select Operator [SEL_593] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_597] + Please refer to the previous Select Operator [SEL_589] <-Reducer 46 [BROADCAST_EDGE] vectorized BROADCAST [RS_718] Group By Operator [GBY_717] (rows=1 width=12) @@ -347,9 +347,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_665] Group By Operator [GBY_657] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_643] (rows=3600 width=107) + Select Operator [SEL_643] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_633] <-Reducer 55 [BROADCAST_EDGE] vectorized BROADCAST [RS_720] Group By Operator [GBY_719] (rows=1 width=12) @@ -379,31 +379,31 @@ Stage-0 SHUFFLE [RS_70] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_570] (rows=696954748 width=88) - Conds:RS_67._col1=RS_644._col0(Inner),Output:["_col2"] + Conds:RS_67._col0=RS_644._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_644] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_634] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_626] (rows=14400 width=471) + predicate:((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_67] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_569] (rows=633595212 width=88) - Conds:RS_734._col0=RS_608._col0(Inner),Output:["_col1","_col2"] + Conds:RS_734._col1=RS_594._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_608] + SHUFFLE [RS_594] PartitionCols:_col0 - Select Operator [SEL_598] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_590] (rows=14400 width=471) - predicate:((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 63 [SIMPLE_EDGE] vectorized SHUFFLE [RS_734] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_733] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_732] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_68_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_68_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_68_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_65_time_dim_t_time_sk_min) AND DynamicValue(RS_65_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_65_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_71_store_s_store_sk_min) AND DynamicValue(RS_71_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_71_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_65_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_65_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_65_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_68_time_dim_t_time_sk_min) AND DynamicValue(RS_68_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_68_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_71_store_s_store_sk_min) AND DynamicValue(RS_71_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_71_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_52] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 18 [BROADCAST_EDGE] vectorized @@ -411,12 +411,12 @@ Stage-0 Group By Operator [GBY_726] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_630] - Group By Operator [GBY_622] (rows=1 width=12) + SHUFFLE [RS_616] + Group By Operator [GBY_608] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_609] (rows=14400 width=471) + Select Operator [SEL_595] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_598] + Please refer to the previous Select Operator [SEL_589] <-Reducer 47 [BROADCAST_EDGE] vectorized BROADCAST [RS_729] Group By Operator [GBY_728] (rows=1 width=12) @@ -425,9 +425,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_666] Group By Operator [GBY_658] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_645] (rows=3600 width=107) + Select Operator [SEL_645] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_634] <-Reducer 56 [BROADCAST_EDGE] vectorized BROADCAST [RS_731] Group By Operator [GBY_730] (rows=1 width=12) @@ -457,31 +457,31 @@ Stage-0 SHUFFLE [RS_96] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_573] (rows=696954748 width=88) - Conds:RS_93._col1=RS_646._col0(Inner),Output:["_col2"] + Conds:RS_93._col0=RS_646._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_646] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_635] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_627] (rows=14400 width=471) + predicate:((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_93] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_572] (rows=633595212 width=88) - Conds:RS_745._col0=RS_610._col0(Inner),Output:["_col1","_col2"] + Conds:RS_745._col1=RS_596._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_610] + SHUFFLE [RS_596] PartitionCols:_col0 - Select Operator [SEL_599] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_591] (rows=14400 width=471) - predicate:((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 64 [SIMPLE_EDGE] vectorized SHUFFLE [RS_745] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_744] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_743] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_94_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_94_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_94_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_91_time_dim_t_time_sk_min) AND DynamicValue(RS_91_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_91_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_97_store_s_store_sk_min) AND DynamicValue(RS_97_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_97_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_91_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_91_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_91_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_94_time_dim_t_time_sk_min) AND DynamicValue(RS_94_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_94_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_97_store_s_store_sk_min) AND DynamicValue(RS_97_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_97_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_78] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 23 [BROADCAST_EDGE] vectorized @@ -489,12 +489,12 @@ Stage-0 Group By Operator [GBY_737] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_631] - Group By Operator [GBY_623] (rows=1 width=12) + SHUFFLE [RS_617] + Group By Operator [GBY_609] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_611] (rows=14400 width=471) + Select Operator [SEL_597] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_599] + Please refer to the previous Select Operator [SEL_589] <-Reducer 48 [BROADCAST_EDGE] vectorized BROADCAST [RS_740] Group By Operator [GBY_739] (rows=1 width=12) @@ -503,9 +503,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_667] Group By Operator [GBY_659] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_647] (rows=3600 width=107) + Select Operator [SEL_647] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_635] <-Reducer 57 [BROADCAST_EDGE] vectorized BROADCAST [RS_742] Group By Operator [GBY_741] (rows=1 width=12) @@ -535,31 +535,31 @@ Stage-0 SHUFFLE [RS_122] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_576] (rows=696954748 width=88) - Conds:RS_119._col1=RS_648._col0(Inner),Output:["_col2"] + Conds:RS_119._col0=RS_648._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_648] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_636] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_628] (rows=14400 width=471) + predicate:((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_119] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_575] (rows=633595212 width=88) - Conds:RS_756._col0=RS_612._col0(Inner),Output:["_col1","_col2"] + Conds:RS_756._col1=RS_598._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_612] + SHUFFLE [RS_598] PartitionCols:_col0 - Select Operator [SEL_600] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_592] (rows=14400 width=471) - predicate:((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 65 [SIMPLE_EDGE] vectorized SHUFFLE [RS_756] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_755] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_754] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_120_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_120_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_120_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_117_time_dim_t_time_sk_min) AND DynamicValue(RS_117_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_117_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_123_store_s_store_sk_min) AND DynamicValue(RS_123_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_123_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_117_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_117_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_117_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_120_time_dim_t_time_sk_min) AND DynamicValue(RS_120_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_120_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_123_store_s_store_sk_min) AND DynamicValue(RS_123_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_123_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_104] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 28 [BROADCAST_EDGE] vectorized @@ -567,12 +567,12 @@ Stage-0 Group By Operator [GBY_748] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_632] - Group By Operator [GBY_624] (rows=1 width=12) + SHUFFLE [RS_618] + Group By Operator [GBY_610] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_613] (rows=14400 width=471) + Select Operator [SEL_599] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_600] + Please refer to the previous Select Operator [SEL_589] <-Reducer 49 [BROADCAST_EDGE] vectorized BROADCAST [RS_751] Group By Operator [GBY_750] (rows=1 width=12) @@ -581,9 +581,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_668] Group By Operator [GBY_660] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_649] (rows=3600 width=107) + Select Operator [SEL_649] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_636] <-Reducer 58 [BROADCAST_EDGE] vectorized BROADCAST [RS_753] Group By Operator [GBY_752] (rows=1 width=12) @@ -613,31 +613,31 @@ Stage-0 SHUFFLE [RS_148] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_579] (rows=696954748 width=88) - Conds:RS_145._col1=RS_650._col0(Inner),Output:["_col2"] + Conds:RS_145._col0=RS_650._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_650] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_637] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_629] (rows=14400 width=471) + predicate:((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 29 [SIMPLE_EDGE] SHUFFLE [RS_145] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_578] (rows=633595212 width=88) - Conds:RS_767._col0=RS_614._col0(Inner),Output:["_col1","_col2"] + Conds:RS_767._col1=RS_600._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_614] + SHUFFLE [RS_600] PartitionCols:_col0 - Select Operator [SEL_601] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_593] (rows=14400 width=471) - predicate:((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 66 [SIMPLE_EDGE] vectorized SHUFFLE [RS_767] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_766] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_765] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_146_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_146_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_146_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_143_time_dim_t_time_sk_min) AND DynamicValue(RS_143_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_143_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_149_store_s_store_sk_min) AND DynamicValue(RS_149_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_149_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_143_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_143_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_143_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_146_time_dim_t_time_sk_min) AND DynamicValue(RS_146_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_146_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_149_store_s_store_sk_min) AND DynamicValue(RS_149_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_149_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_130] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 33 [BROADCAST_EDGE] vectorized @@ -645,12 +645,12 @@ Stage-0 Group By Operator [GBY_759] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_633] - Group By Operator [GBY_625] (rows=1 width=12) + SHUFFLE [RS_619] + Group By Operator [GBY_611] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_615] (rows=14400 width=471) + Select Operator [SEL_601] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_601] + Please refer to the previous Select Operator [SEL_589] <-Reducer 50 [BROADCAST_EDGE] vectorized BROADCAST [RS_762] Group By Operator [GBY_761] (rows=1 width=12) @@ -659,9 +659,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_669] Group By Operator [GBY_661] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_651] (rows=3600 width=107) + Select Operator [SEL_651] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_637] <-Reducer 59 [BROADCAST_EDGE] vectorized BROADCAST [RS_764] Group By Operator [GBY_763] (rows=1 width=12) @@ -691,31 +691,31 @@ Stage-0 SHUFFLE [RS_174] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_582] (rows=696954748 width=88) - Conds:RS_171._col1=RS_652._col0(Inner),Output:["_col2"] + Conds:RS_171._col0=RS_652._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_652] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_638] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_630] (rows=14400 width=471) + predicate:((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_171] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_581] (rows=633595212 width=88) - Conds:RS_778._col0=RS_616._col0(Inner),Output:["_col1","_col2"] + Conds:RS_778._col1=RS_602._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_616] + SHUFFLE [RS_602] PartitionCols:_col0 - Select Operator [SEL_602] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_594] (rows=14400 width=471) - predicate:((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 67 [SIMPLE_EDGE] vectorized SHUFFLE [RS_778] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_777] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_776] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_172_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_172_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_172_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_169_time_dim_t_time_sk_min) AND DynamicValue(RS_169_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_169_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_175_store_s_store_sk_min) AND DynamicValue(RS_175_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_175_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_169_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_169_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_169_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_172_time_dim_t_time_sk_min) AND DynamicValue(RS_172_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_172_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_175_store_s_store_sk_min) AND DynamicValue(RS_175_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_175_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_156] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 38 [BROADCAST_EDGE] vectorized @@ -723,12 +723,12 @@ Stage-0 Group By Operator [GBY_770] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_634] - Group By Operator [GBY_626] (rows=1 width=12) + SHUFFLE [RS_620] + Group By Operator [GBY_612] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_617] (rows=14400 width=471) + Select Operator [SEL_603] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_602] + Please refer to the previous Select Operator [SEL_589] <-Reducer 51 [BROADCAST_EDGE] vectorized BROADCAST [RS_773] Group By Operator [GBY_772] (rows=1 width=12) @@ -737,9 +737,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_670] Group By Operator [GBY_662] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_653] (rows=3600 width=107) + Select Operator [SEL_653] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_638] <-Reducer 60 [BROADCAST_EDGE] vectorized BROADCAST [RS_775] Group By Operator [GBY_774] (rows=1 width=12) @@ -769,31 +769,31 @@ Stage-0 SHUFFLE [RS_200] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_585] (rows=696954748 width=88) - Conds:RS_197._col1=RS_654._col0(Inner),Output:["_col2"] + Conds:RS_197._col0=RS_654._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_654] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_639] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_631] (rows=14400 width=471) + predicate:((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 39 [SIMPLE_EDGE] SHUFFLE [RS_197] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_584] (rows=633595212 width=88) - Conds:RS_789._col0=RS_618._col0(Inner),Output:["_col1","_col2"] + Conds:RS_789._col1=RS_604._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_618] + SHUFFLE [RS_604] PartitionCols:_col0 - Select Operator [SEL_603] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_595] (rows=14400 width=471) - predicate:((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 68 [SIMPLE_EDGE] vectorized SHUFFLE [RS_789] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_788] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_787] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_198_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_198_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_198_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_195_time_dim_t_time_sk_min) AND DynamicValue(RS_195_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_195_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_201_store_s_store_sk_min) AND DynamicValue(RS_201_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_201_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_195_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_195_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_195_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_198_time_dim_t_time_sk_min) AND DynamicValue(RS_198_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_198_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_201_store_s_store_sk_min) AND DynamicValue(RS_201_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_201_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_182] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 43 [BROADCAST_EDGE] vectorized @@ -801,12 +801,12 @@ Stage-0 Group By Operator [GBY_781] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_635] - Group By Operator [GBY_627] (rows=1 width=12) + SHUFFLE [RS_621] + Group By Operator [GBY_613] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_619] (rows=14400 width=471) + Select Operator [SEL_605] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_603] + Please refer to the previous Select Operator [SEL_589] <-Reducer 52 [BROADCAST_EDGE] vectorized BROADCAST [RS_784] Group By Operator [GBY_783] (rows=1 width=12) @@ -815,7 +815,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_671] Group By Operator [GBY_663] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_655] (rows=3600 width=107) + Select Operator [SEL_655] (rows=14400 width=471) Output:["_col0"] Please refer to the previous Select Operator [SEL_639] <-Reducer 61 [BROADCAST_EDGE] vectorized @@ -847,31 +847,31 @@ Stage-0 SHUFFLE [RS_18] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_564] (rows=696954748 width=88) - Conds:RS_15._col1=RS_640._col0(Inner),Output:["_col2"] + Conds:RS_15._col0=RS_640._col0(Inner),Output:["_col2"] <-Map 44 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_640] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_639] + Select Operator [SEL_632] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_624] (rows=14400 width=471) + predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_563] (rows=633595212 width=88) - Conds:RS_712._col0=RS_604._col0(Inner),Output:["_col1","_col2"] + Conds:RS_712._col1=RS_590._col0(Inner),Output:["_col0","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_604] + SHUFFLE [RS_590] PartitionCols:_col0 - Select Operator [SEL_596] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_588] (rows=14400 width=471) - predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous Select Operator [SEL_589] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_712] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_711] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_710] (rows=575995635 width=88) - predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_16_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_16_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_16_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_13_time_dim_t_time_sk_min) AND DynamicValue(RS_13_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_13_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_13_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_13_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_13_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_time_sk BETWEEN DynamicValue(RS_16_time_dim_t_time_sk_min) AND DynamicValue(RS_16_time_dim_t_time_sk_max) and in_bloom_filter(ss_sold_time_sk, DynamicValue(RS_16_time_dim_t_time_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] <-Reducer 45 [BROADCAST_EDGE] vectorized @@ -882,9 +882,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_664] Group By Operator [GBY_656] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_641] (rows=3600 width=107) + Select Operator [SEL_641] (rows=14400 width=471) Output:["_col0"] - Please refer to the previous Select Operator [SEL_639] + Please refer to the previous Select Operator [SEL_632] <-Reducer 54 [BROADCAST_EDGE] vectorized BROADCAST [RS_709] Group By Operator [GBY_708] (rows=1 width=12) @@ -897,14 +897,14 @@ Stage-0 Output:["_col0"] Please refer to the previous Select Operator [SEL_675] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_637] - Group By Operator [GBY_636] (rows=1 width=12) + BROADCAST [RS_623] + Group By Operator [GBY_622] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_628] - Group By Operator [GBY_620] (rows=1 width=12) + SHUFFLE [RS_614] + Group By Operator [GBY_606] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_605] (rows=14400 width=471) + Select Operator [SEL_591] (rows=2000 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_596] + Please refer to the previous Select Operator [SEL_589] diff --git a/ql/src/test/results/clientpositive/perf/tez/query89.q.out b/ql/src/test/results/clientpositive/perf/tez/query89.q.out index 29db8145ec..9c306fae3f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query89.q.out @@ -95,21 +95,21 @@ Stage-0 Select Operator [SEL_116] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] PTF Operator [PTF_115] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col3, _col1, _col4, _col5"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] Select Operator [SEL_114] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_113] - PartitionCols:_col3, _col1, _col4, _col5 + PartitionCols:_col2, _col0, _col4, _col5 Group By Operator [GBY_112] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_22] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col6, _col8, _col9, _col10, _col12, _col13 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col10, _col12, _col13 Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) - Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col6","_col8","_col9","_col10","_col12","_col13"] + Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_103] PartitionCols:_col0 @@ -123,37 +123,37 @@ Stage-0 SHUFFLE [RS_18] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) - Conds:RS_15._col1=RS_95._col0(Inner),Output:["_col2","_col3","_col6","_col8","_col9","_col10"] + Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col10"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_94] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_93] (rows=462000 width=1436) - predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] + Select Operator [SEL_94] (rows=36524 width=1119) + Output:["_col0","_col2"] + Filter Operator [FIL_93] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] - PartitionCols:_col1 + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) - Conds:RS_111._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col6"] + Conds:RS_111._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=36524 width=1119) - Output:["_col0","_col2"] - Filter Operator [FIL_85] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + Select Operator [SEL_86] (rows=462000 width=1436) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_85] (rows=462000 width=1436) + predicate:(((((i_category = 'Home') or (i_category = 'Books') or (i_category = 'Electronics')) and ((i_class = 'wallpaper') or (i_class = 'parenting') or (i_class = 'musical'))) or (((i_category = 'Shoes') or (i_category = 'Jewelry') or (i_category = 'Men')) and ((i_class = 'womens') or (i_class = 'birdal') or (i_class = 'pants')))) and (i_category) IN ('Home', 'Books', 'Electronics', 'Shoes', 'Jewelry', 'Men') and (i_class) IN ('wallpaper', 'parenting', 'musical', 'womens', 'birdal', 'pants') and i_item_sk is not null) + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] - PartitionCols:_col0 + PartitionCols:_col1 Select Operator [SEL_110] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_109] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized @@ -164,7 +164,7 @@ Stage-0 SHUFFLE [RS_98] Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=462000 width=1436) + Select Operator [SEL_96] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_94] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -186,7 +186,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=36524 width=1119) + Select Operator [SEL_88] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_86]