diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 5d90c87a67..33205a594f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; -import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -32,8 +31,6 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; @@ -51,13 +48,7 @@ protected static final Logger LOG = LoggerFactory.getLogger(HivePreFilteringRule.class); - private static final Set COMPARISON = EnumSet.of(SqlKind.EQUALS, - SqlKind.GREATER_THAN_OR_EQUAL, - SqlKind.LESS_THAN_OR_EQUAL, - SqlKind.GREATER_THAN, SqlKind.LESS_THAN, - SqlKind.NOT_EQUALS); - - private final FilterFactory filterFactory; + private final FilterFactory filterFactory; // Max number of nodes when converting to CNF private final int maxCNFNodeCount; @@ -120,7 +111,7 @@ public void onMatch(RelOptRuleCall call) { for (RexNode operand : operands) { if (operand.getKind() == SqlKind.OR) { - extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount); + extractedCommonOperands = extractCommonOperands(rexBuilder, filter.getInput(), operand, maxCNFNodeCount); for (RexNode extractedExpr : extractedCommonOperands) { if (operandsToPushDownDigest.add(extractedExpr.toString())) { operandsToPushDown.add(extractedExpr); @@ -155,7 +146,7 @@ public void onMatch(RelOptRuleCall call) { break; case OR: - operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount); + operandsToPushDown = extractCommonOperands(rexBuilder, filter.getInput(), topFilterCondition, maxCNFNodeCount); break; default: return; @@ -191,8 +182,8 @@ public void onMatch(RelOptRuleCall call) { } - private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition, - int maxCNFNodeCount) { + private static List extractCommonOperands(RexBuilder rexBuilder, RelNode input, + RexNode condition, int maxCNFNodeCount) { assert condition.getKind() == SqlKind.OR; Multimap reductionCondition = LinkedHashMultimap.create(); @@ -216,27 +207,12 @@ public void onMatch(RelOptRuleCall call) { return new ArrayList<>(); } RexCall conjCall = (RexCall) conjunction; - RexNode ref = null; - if (COMPARISON.contains(conjCall.getOperator().getKind())) { - if (conjCall.operands.get(0) instanceof RexInputRef - && conjCall.operands.get(1) instanceof RexLiteral) { - ref = conjCall.operands.get(0); - } else if (conjCall.operands.get(1) instanceof RexInputRef - && conjCall.operands.get(0) instanceof RexLiteral) { - ref = conjCall.operands.get(1); - } else { - // We do not know what it is, we bail out for safety - return new ArrayList<>(); - } - } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) { - ref = conjCall.operands.get(0); - } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { - ref = conjCall.operands.get(1); - } else { + Set refs = HiveCalciteUtil.getInputRefs(conjCall); + if (refs.size() != 1) { // We do not know what it is, we bail out for safety return new ArrayList<>(); } - + RexNode ref = rexBuilder.makeInputRef(input, refs.iterator().next()); String stringRef = ref.toString(); reductionCondition.put(stringRef, conjCall); refsInCurrentOperand.add(stringRef); diff --git a/ql/src/test/results/clientpositive/perf/tez/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out index ffef171259..7c8827e280 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -115,166 +115,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 9 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_167] - Select Operator [SEL_166] (rows=1 width=256) + File Output Operator [FS_162] + Select Operator [SEL_161] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_165] (rows=1 width=256) + Group By Operator [GBY_160] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_37] Group By Operator [GBY_36] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)"] - Merge Join Operator [MERGEJOIN_121] (rows=8066665 width=1014) - Conds:RS_32._col4=RS_156._col0(Inner),Output:["_col5","_col7","_col8"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - PartitionCols:_col0 - Select Operator [SEL_155] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_154] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col4 - Filter Operator [FIL_31] (rows=7333332 width=1014) - predicate:((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_120] (rows=22000000 width=1014) - Conds:RS_28._col3=RS_148._col0(Inner),Output:["_col4","_col5","_col7","_col8","_col9","_col18"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] - PartitionCols:_col0 - Select Operator [SEL_147] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col3 - Filter Operator [FIL_27] (rows=10647918 width=88) - predicate:(((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) - Merge Join Operator [MERGEJOIN_119] (rows=255550079 width=88) - Conds:RS_24._col2=RS_140._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col16"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] - PartitionCols:_col0 - Select Operator [SEL_139] (rows=7200 width=107) - Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=7200 width=107) - predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_118] (rows=232318249 width=88) - Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - PartitionCols:_col0 - Select Operator [SEL_131] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_130] (rows=1861800 width=385) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=211198404 width=88) - Conds:RS_164._col0=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - PartitionCols:_col0 - Select Operator [SEL_123] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_122] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] - PartitionCols:_col0 - Select Operator [SEL_163] (rows=191998545 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_162] (rows=191998545 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_29_customer_address_ca_address_sk_min) AND DynamicValue(RS_29_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_29_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_33_store_s_store_sk_min) AND DynamicValue(RS_33_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_33_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_137] - Group By Operator [GBY_136] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_133] (rows=1861800 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_131] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=7200 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_139] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_153] - Group By Operator [GBY_152] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - Group By Operator [GBY_150] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_149] (rows=20000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_147] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_157] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_155] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)"] + Select Operator [SEL_35] (rows=1431552 width=88) + Output:["_col6","_col8","_col9"] + Filter Operator [FIL_34] (rows=1431552 width=88) + predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) + Merge Join Operator [MERGEJOIN_121] (rows=34357287 width=88) + Conds:RS_31._col2=RS_148._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_146] (rows=1861800 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_15] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col2 + Filter Operator [FIL_30] (rows=31233897 width=88) + predicate:((((_col16 = 'KY') or (_col16 = 'GA') or (_col16 = 'NM')) and _col10 BETWEEN 100 AND 200) or (((_col16 = 'MT') or (_col16 = 'OR') or (_col16 = 'IN')) and _col10 BETWEEN 150 AND 300) or (((_col16 = 'WI') or (_col16 = 'MO') or (_col16 = 'WV')) and _col10 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_120] (rows=93701693 width=88) + Conds:RS_27._col4=RS_159._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_119] (rows=85183356 width=88) + Conds:RS_24._col3=RS_140._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + PartitionCols:_col0 + Select Operator [SEL_139] (rows=7200 width=107) + Output:["_col0","_col1"] + Filter Operator [FIL_138] (rows=7200 width=107) + predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) + TableScan [TS_9] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_118] (rows=77439413 width=88) + Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_130] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_117] (rows=70399465 width=88) + Conds:RS_124._col0=RS_156._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_122] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col4 + Select Operator [SEL_155] (rows=63999513 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_154] (rows=63999513 width=88) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_32_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_32_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_32_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_133] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_131] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_141] (rows=7200 width=107) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_139] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_153] + Group By Operator [GBY_152] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] + Select Operator [SEL_149] (rows=1861800 width=385) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_147] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_129] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_125] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_123] diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index 24ad2da1bb..6def223813 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,135 +143,125 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_133] - Group By Operator [GBY_132] (rows=1 width=8) + File Output Operator [FS_128] + Group By Operator [GBY_127] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col4)"] - Merge Join Operator [MERGEJOIN_96] (rows=93701696 width=88) - Conds:RS_25._col3=RS_123._col0(Inner),Output:["_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - PartitionCols:_col0 - Select Operator [SEL_122] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_121] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_12] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col3 - Filter Operator [FIL_24] (rows=85183359 width=88) - predicate:((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_95] (rows=255550079 width=88) - Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col4","_col6","_col13"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] - PartitionCols:_col0 - Select Operator [SEL_114] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_113] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_9] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=232318249 width=88) - Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - PartitionCols:_col0 - Select Operator [SEL_106] (rows=465450 width=385) - Output:["_col0"] - Filter Operator [FIL_105] (rows=465450 width=385) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=211198404 width=88) - Conds:RS_131._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_99] - PartitionCols:_col0 - Select Operator [SEL_98] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_97] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - PartitionCols:_col0 - Select Operator [SEL_130] (rows=191998545 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Filter Operator [FIL_129] (rows=191998545 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_19_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_19_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_19_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_26_store_s_store_sk_min) AND DynamicValue(RS_26_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_26_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=465450 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - Group By Operator [GBY_117] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_116] (rows=20000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_114] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_124] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_122] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] - Group By Operator [GBY_101] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_100] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_98] + Output:["_col0"],aggregations:["sum(_col5)"] + Select Operator [SEL_28] (rows=31233897 width=88) + Output:["_col5"] + Filter Operator [FIL_27] (rows=31233897 width=88) + predicate:((((_col14 = 'KY') or (_col14 = 'GA') or (_col14 = 'NM')) and _col7 BETWEEN 0 AND 2000) or (((_col14 = 'MT') or (_col14 = 'OR') or (_col14 = 'IN')) and _col7 BETWEEN 150 AND 3000) or (((_col14 = 'WI') or (_col14 = 'MO') or (_col14 = 'WV')) and _col7 BETWEEN 50 AND 25000)) + Merge Join Operator [MERGEJOIN_96] (rows=93701693 width=88) + Conds:RS_24._col3=RS_126._col0(Inner),Output:["_col5","_col7","_col14"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_95] (rows=85183356 width=88) + Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col5","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + PartitionCols:_col0 + Select Operator [SEL_114] (rows=465450 width=385) + Output:["_col0"] + Filter Operator [FIL_113] (rows=465450 width=385) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) + TableScan [TS_9] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_94] (rows=77439413 width=88) + Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col5","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_105] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_93] (rows=70399465 width=88) + Conds:RS_99._col0=RS_123._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_99] + PartitionCols:_col0 + Select Operator [SEL_98] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_97] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col3 + Select Operator [SEL_122] (rows=63999513 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col6"] + Filter Operator [FIL_121] (rows=63999513 width=88) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_108] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_116] (rows=465450 width=385) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_114] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + Group By Operator [GBY_101] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_100] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_98] diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index 5e2af5a0c6..3af8176f51 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,16 +183,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) +Map 12 <- Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 11 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 7 <- Map 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -202,146 +202,144 @@ Stage-0 Stage-1 Reducer 10 vectorized File Output Operator [FS_244] - Limit [LIM_243] (rows=100 width=1014) + Limit [LIM_243] (rows=100 width=385) Number of rows:100 - Select Operator [SEL_242] (rows=3666666 width=1014) + Select Operator [SEL_242] (rows=1023990 width=385) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_241] - Select Operator [SEL_240] (rows=3666666 width=1014) + Select Operator [SEL_240] (rows=1023990 width=385) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_239] (rows=3666666 width=1014) + Group By Operator [GBY_239] (rows=1023990 width=385) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=7333332 width=1014) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col19 - Select Operator [SEL_47] (rows=7333332 width=1014) - Output:["_col6","_col7","_col12","_col19"] - Filter Operator [FIL_46] (rows=7333332 width=1014) - predicate:((((_col27 = 'KY') or (_col27 = 'GA') or (_col27 = 'NM')) and _col14 BETWEEN 100 AND 200) or (((_col27 = 'MT') or (_col27 = 'OR') or (_col27 = 'IN')) and _col14 BETWEEN 150 AND 300) or (((_col27 = 'WI') or (_col27 = 'MO') or (_col27 = 'WV')) and _col14 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_206] (rows=22000000 width=1014) - Conds:RS_43._col2=RS_238._col0(Inner),Output:["_col6","_col7","_col12","_col14","_col19","_col27"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0 - Select Operator [SEL_237] (rows=20000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_236] (rows=20000000 width=1014) - predicate:((ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_21] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_205] (rows=7086373 width=135) - Conds:RS_40._col3, _col21, _col22=RS_234._col0, _col1, _col2(Inner),Output:["_col2","_col6","_col7","_col12","_col14","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_233] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_232] (rows=1861800 width=385) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_18] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col3, _col21, _col22 - Filter Operator [FIL_39] (rows=6442158 width=135) - predicate:(((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_204] (rows=77305913 width=135) - Conds:RS_36._col1=RS_235._col0(Inner),Output:["_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19","_col21","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_233] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_203] (rows=70278102 width=135) - Conds:RS_33._col4=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col12","_col13","_col14","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=72 width=200) - Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=72 width=200) - predicate:r_reason_sk is not null - TableScan [TS_12] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) - Conds:RS_30._col10=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col12","_col13","_col14"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=4602 width=585) - Output:["_col0"] - Filter Operator [FIL_218] (rows=4602 width=585) - predicate:wp_web_page_sk is not null - TableScan [TS_9] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) - Conds:RS_27._col8=RS_212._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col10","_col12","_col13","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_210] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) - Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0, _col5 - Select Operator [SEL_208] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_207] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col1, _col3 - Select Operator [SEL_227] (rows=48000888 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_226] (rows=48000888 width=135) - predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_31_web_page_wp_web_page_sk_min) AND DynamicValue(RS_31_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_31_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - Group By Operator [GBY_214] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Group By Operator [GBY_222] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=4602 width=585) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] + Group By Operator [GBY_48] (rows=2047980 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","count(_col5)","sum(_col17)","count(_col17)","sum(_col16)","count(_col16)"],keys:_col19 + Merge Join Operator [MERGEJOIN_206] (rows=2047980 width=385) + Conds:RS_44._col13, _col24, _col25=RS_237._col0, _col1, _col2(Inner),Output:["_col5","_col16","_col17","_col19"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_236] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_235] (rows=1861800 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_21] (rows=1861800 width=385) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col13, _col24, _col25 + Filter Operator [FIL_43] (rows=787374 width=135) + predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col6 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col6 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200)) + Merge Join Operator [MERGEJOIN_205] (rows=9448497 width=135) + Conds:RS_40._col11=RS_238._col0(Inner),Output:["_col5","_col6","_col13","_col16","_col17","_col19","_col24","_col25"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_236] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col11 + Filter Operator [FIL_39] (rows=8589543 width=135) + predicate:((((_col21 = 'KY') or (_col21 = 'GA') or (_col21 = 'NM')) and _col7 BETWEEN 100 AND 200) or (((_col21 = 'MT') or (_col21 = 'OR') or (_col21 = 'IN')) and _col7 BETWEEN 150 AND 300) or (((_col21 = 'WI') or (_col21 = 'MO') or (_col21 = 'WV')) and _col7 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_204] (rows=25768635 width=135) + Conds:RS_36._col12=RS_234._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col19","_col21"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + PartitionCols:_col0 + Select Operator [SEL_233] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_232] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_15] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col12 + Merge Join Operator [MERGEJOIN_203] (rows=23426032 width=135) + Conds:RS_33._col14=RS_231._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col16","_col17","_col19"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=72 width=200) + Output:["_col0","_col1"] + Filter Operator [FIL_229] (rows=72 width=200) + predicate:r_reason_sk is not null + TableScan [TS_12] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col14 + Merge Join Operator [MERGEJOIN_202] (rows=21296393 width=135) + Conds:RS_30._col2, _col4=RS_228._col0, _col5(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col16","_col17"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0, _col5 + Select Operator [SEL_227] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_226] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_9] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col2, _col4 + Merge Join Operator [MERGEJOIN_201] (rows=19360357 width=135) + Conds:RS_27._col1=RS_217._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_215] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_200] (rows=17600325 width=135) + Conds:RS_209._col0=RS_225._col2(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=4602 width=585) + Output:["_col0"] + Filter Operator [FIL_207] (rows=4602 width=585) + predicate:wp_web_page_sk is not null + TableScan [TS_0] (rows=4602 width=585) + default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col2 + Select Operator [SEL_224] (rows=16000296 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_223] (rows=16000296 width=135) + predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_24_web_page_wp_web_page_sk_min) AND DynamicValue(RS_24_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_24_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_3] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_210] (rows=4602 width=585) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_208] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_218] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216]