diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 02cebdc5ac..b20861b4c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -481,6 +481,12 @@ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, markSemiJoinForDPP(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); + // Remove any semi join edges from Union Op + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + removeSemiJoinEdgesForUnion(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, + "Remove any semi join edge between Union and RS"); + // Remove any parallel edge between semijoin and mapjoin. perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemijoinsParallelToMapJoin(procCtx); @@ -1313,6 +1319,56 @@ private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperato return parallelEdges; } + /* + * Given an operator this method removes all semi join edges downstream (children) until it hits RS + */ + private void removeSemiJoinEdges(Operator op, OptimizeTezProcContext procCtx, + Map sjToRemove) throws SemanticException { + if(op instanceof ReduceSinkOperator && op.getNumChild() == 0) { + Map sjMap = procCtx.parseContext.getRsToSemiJoinBranchInfo(); + if(sjMap.get(op) != null) { + sjToRemove.put((ReduceSinkOperator)op, sjMap.get(op).getTsOp()); + } + } + + for(Operator child:op.getChildOperators()) { + removeSemiJoinEdges(child, procCtx, sjToRemove); + } + } + + private void removeSemiJoinEdgesForUnion(OptimizeTezProcContext procCtx) throws SemanticException{ + // Get all the TS ops. + List> topOps = new ArrayList<>(); + topOps.addAll(procCtx.parseContext.getTopOps().values()); + Set> unionOps = new HashSet<>(); + + Map sjToRemove = new HashMap<>(); + for (Operator parent : topOps) { + Deque> deque = new LinkedList<>(); + deque.add(parent); + while (!deque.isEmpty()) { + Operator op = deque.pollLast(); + if (op instanceof UnionOperator && !unionOps.contains(op)) { + unionOps.add(op); + removeSemiJoinEdges(op, procCtx, sjToRemove); + } + deque.addAll(op.getChildOperators()); + } + } + // remove sj + if (sjToRemove.size() > 0) { + for (ReduceSinkOperator rs : sjToRemove.keySet()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Semijoin optimization with Union operator. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(sjToRemove.get(rs))); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, + sjToRemove.get(rs)); + } + } + } + /* * The algorithm looks at all the mapjoins in the operator pipeline until * it hits RS Op and for each mapjoin examines if it has paralllel semijoin diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index cb3740d09a..ecf6323e8e 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -596,15 +596,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 10 <- Union 2 (CONTAINS) - Map 11 <- Reducer 9 (BROADCAST_EDGE) - Reducer 3 <- Map 11 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Map 9 <- Union 2 (CONTAINS) + Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -626,22 +624,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map 10 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: ROW__ID (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 9 Map Operator Tree: TableScan alias: nonacidorctbl @@ -660,37 +658,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 11 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl__col0_min) AND DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -878,18 +847,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index fd5f6d7521..50fa078159 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with my_customers as ( diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 7a0750eda3..8d10899c63 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -133,9 +133,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 24 (BROADCAST_EDGE) -Map 18 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS) -Map 25 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS) +Map 18 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) +Map 24 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE) @@ -144,16 +143,15 @@ Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 11 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 20 <- Map 26 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) -Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 20 <- Map 25 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) +Reducer 21 <- Map 27 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 28 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) -Reducer 4 <- Map 30 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 32 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 4 <- Map 29 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 31 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) @@ -164,25 +162,25 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_358] - Limit [LIM_357] (rows=1 width=16) + File Output Operator [FS_353] + Limit [LIM_352] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_356] (rows=1 width=16) + Select Operator [SEL_351] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] - Select Operator [SEL_354] (rows=1 width=16) + SHUFFLE [RS_350] + Select Operator [SEL_349] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_353] (rows=1 width=12) + Group By Operator [GBY_348] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + SHUFFLE [RS_347] PartitionCols:_col0 - Group By Operator [GBY_351] (rows=1 width=12) + Group By Operator [GBY_346] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_350] (rows=1 width=116) + Select Operator [SEL_345] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_349] (rows=1 width=116) + Group By Operator [GBY_344] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] @@ -202,42 +200,42 @@ Stage-0 Merge Join Operator [MERGEJOIN_277] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_339] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_335] + Group By Operator [GBY_334] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_301] PartitionCols:_col0 - Group By Operator [GBY_317] (rows=25 width=4) + Group By Operator [GBY_298] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_314] (rows=50 width=12) + Select Operator [SEL_295] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_312] (rows=50 width=12) + Filter Operator [FIL_293] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_26] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=8) - Filter Operator [FIL_346] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_343] + Select Operator [SEL_342] (rows=1 width=8) + Filter Operator [FIL_341] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_345] (rows=1 width=8) + Group By Operator [GBY_340] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_343] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_339] + Group By Operator [GBY_338] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_342] (rows=25 width=4) - Group By Operator [GBY_341] (rows=25 width=4) + Select Operator [SEL_337] (rows=25 width=4) + Group By Operator [GBY_336] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_302] PartitionCols:_col0 - Group By Operator [GBY_318] (rows=25 width=4) + Group By Operator [GBY_299] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_315] (rows=50 width=12) + Select Operator [SEL_296] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_312] + Please refer to the previous Filter Operator [FIL_293] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] Select Operator [SEL_107] (rows=224732600 width=119) @@ -245,41 +243,41 @@ Stage-0 Merge Join Operator [MERGEJOIN_278] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col7","_col13"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_324] - Group By Operator [GBY_322] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_305] + Group By Operator [GBY_303] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_300] PartitionCols:_col0 - Group By Operator [GBY_316] (rows=25 width=4) + Group By Operator [GBY_297] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_313] (rows=50 width=12) + Select Operator [SEL_294] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_312] + Please refer to the previous Filter Operator [FIL_293] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_104] Merge Join Operator [MERGEJOIN_276] (rows=8989304 width=8) Conds:RS_101._col5=RS_102._col0(Inner),Output:["_col2","_col4","_col7"] - <-Reducer 32 [SIMPLE_EDGE] + <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_102] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_275] (rows=39720279 width=4) - Conds:RS_335._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col0"] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_335] + Conds:RS_330._col1, _col2=RS_333._col0, _col1(Inner),Output:["_col0"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] PartitionCols:_col1, _col2 - Select Operator [SEL_334] (rows=40000000 width=188) + Select Operator [SEL_329] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_333] (rows=40000000 width=188) + Filter Operator [FIL_328] (rows=40000000 width=188) predicate:(ca_county is not null and ca_state is not null) TableScan [TS_74] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] PartitionCols:_col0, _col1 - Select Operator [SEL_337] (rows=1704 width=184) + Select Operator [SEL_332] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_336] (rows=1704 width=184) + Filter Operator [FIL_331] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_77] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] @@ -287,11 +285,11 @@ Stage-0 SHUFFLE [RS_101] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_274] (rows=8989304 width=12) - Conds:RS_98._col0=RS_332._col0(Inner),Output:["_col2","_col4","_col5","_col7"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + Conds:RS_98._col0=RS_327._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col0 - Select Operator [SEL_331] (rows=73049 width=8) + Select Operator [SEL_326] (rows=73049 width=8) Output:["_col0","_col1"] TableScan [TS_72] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] @@ -299,11 +297,37 @@ Stage-0 SHUFFLE [RS_98] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_273] (rows=8989304 width=8) - Conds:RS_95._col1=RS_303._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + Conds:RS_95._col1=RS_325._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] + Select Operator [SEL_291] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_290] (rows=525327388 width=114) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_23] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + Select Operator [SEL_310] (rows=1 width=8) + Filter Operator [FIL_309] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_308] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_307] + Group By Operator [GBY_306] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_304] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_303] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] + SHUFFLE [RS_325] PartitionCols:_col0 - Group By Operator [GBY_302] (rows=55046 width=8) + Group By Operator [GBY_324] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -311,13 +335,13 @@ Stage-0 Group By Operator [GBY_68] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col5, _col6 Merge Join Operator [MERGEJOIN_272] (rows=110092 width=8) - Conds:RS_64._col1=RS_301._col0(Inner),Output:["_col5","_col6"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] + Conds:RS_64._col1=RS_323._col0(Inner),Output:["_col5","_col6"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0 - Select Operator [SEL_300] (rows=80000000 width=8) + Select Operator [SEL_322] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_299] (rows=80000000 width=8) + Filter Operator [FIL_321] (rows=80000000 width=8) predicate:c_current_addr_sk is not null TableScan [TS_55] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] @@ -325,13 +349,13 @@ Stage-0 SHUFFLE [RS_64] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_271] (rows=110092 width=0) - Conds:RS_61._col2=RS_298._col0(Inner),Output:["_col1"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + Conds:RS_61._col2=RS_320._col0(Inner),Output:["_col1"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Select Operator [SEL_297] (rows=453 width=4) + Select Operator [SEL_319] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_296] (rows=453 width=186) + Filter Operator [FIL_318] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) TableScan [TS_52] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] @@ -339,84 +363,47 @@ Stage-0 SHUFFLE [RS_61] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_270] (rows=11665117 width=7) - Conds:Union 19._col0=RS_292._col0(Inner),Output:["_col1","_col2"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_292] + Conds:Union 19._col0=RS_314._col0(Inner),Output:["_col1","_col2"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_314] PartitionCols:_col0 - Select Operator [SEL_291] (rows=50 width=4) + Select Operator [SEL_313] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_290] (rows=50 width=12) + Filter Operator [FIL_312] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_49] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Union 19 [SIMPLE_EDGE] <-Map 18 [CONTAINS] vectorized - Reduce Output Operator [RS_364] + Reduce Output Operator [RS_359] PartitionCols:_col0 - Select Operator [SEL_363] (rows=285117831 width=11) + Select Operator [SEL_358] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_362] (rows=285117831 width=11) + Filter Operator [FIL_357] (rows=285117831 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_280] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_360] - Group By Operator [GBY_359] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_355] + Group By Operator [GBY_354] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=50 width=4) + Select Operator [SEL_315] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_291] - <-Map 25 [CONTAINS] vectorized - Reduce Output Operator [RS_367] + Please refer to the previous Select Operator [SEL_313] + <-Map 24 [CONTAINS] vectorized + Reduce Output Operator [RS_362] PartitionCols:_col0 - Select Operator [SEL_366] (rows=143930993 width=11) + Select Operator [SEL_361] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_365] (rows=143930993 width=11) + Filter Operator [FIL_360] (rows=143930993 width=11) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) TableScan [TS_285] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_361] - Please refer to the previous Group By Operator [GBY_359] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_311] - Select Operator [SEL_310] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=525327388 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_96_customer_c_customer_sk_min) AND DynamicValue(RS_96_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_96_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=55046 width=4) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_302] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_330] - Select Operator [SEL_329] (rows=1 width=8) - Filter Operator [FIL_328] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_327] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_326] - Group By Operator [GBY_325] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_323] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_322] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_356] + Please refer to the previous Group By Operator [GBY_354] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out index d97f9df397..f0080995fb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Reducer 10 <- Union 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_146] - Limit [LIM_145] (rows=1 width=200) + File Output Operator [FS_149] + Limit [LIM_148] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_144] (rows=1 width=200) + Select Operator [SEL_147] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=200) + SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -263,47 +263,47 @@ Stage-0 keys:_col6,sort order:+,top n:100 Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"] + Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_144] PartitionCols:_col2 - Select Operator [SEL_132] (rows=1704 width=276) + Select Operator [SEL_143] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_131] (rows=1704 width=181) + Filter Operator [FIL_142] (rows=1704 width=181) predicate:substr(s_zip, 1, 2) is not null TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_129] (rows=1 width=184) + Select Operator [SEL_140] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_128] (rows=1 width=192) + Filter Operator [FIL_139] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_127] (rows=3098 width=192) + Group By Operator [GBY_138] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Union 9 [SIMPLE_EDGE] + <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_167] + Reduce Output Operator [RS_170] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=3098 width=192) + Group By Operator [GBY_169] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_165] (rows=1126 width=192) + Group By Operator [GBY_168] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_167] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=1126 width=192) + Group By Operator [GBY_166] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_162] (rows=2253 width=97) + Select Operator [SEL_165] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_161] (rows=2253 width=97) + Filter Operator [FIL_164] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_160] (rows=6761 width=97) + Group By Operator [GBY_163] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] @@ -311,40 +311,40 @@ Stage-0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"] + Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_155] (rows=40000000 width=93) + Select Operator [SEL_158] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=40000000 width=93) + Filter Operator [FIL_157] (rows=40000000 width=93) predicate:substr(substr(ca_zip, 1, 5), 1, 2) is not null TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_158] (rows=26666667 width=4) + Select Operator [SEL_161] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_157] (rows=26666667 width=89) + Filter Operator [FIL_160] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] - <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_153] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_156] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=3098 width=192) + Group By Operator [GBY_155] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_151] (rows=5071 width=192) + Group By Operator [GBY_154] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_149] (rows=70994 width=192) + Group By Operator [GBY_152] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_148] (rows=20000000 width=89) + Select Operator [SEL_151] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_147] (rows=20000000 width=89) + Filter Operator [FIL_150] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] @@ -352,34 +352,34 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) - Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"] + Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_127] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_137] (rows=525329897 width=114) + Select Operator [SEL_136] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_136] (rows=525329897 width=114) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_135] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_92] - Group By Operator [GBY_91] (rows=1 width=12) + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_90] (rows=1 width=8) + Select Operator [SEL_130] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - PartitionCols:_col0 - Select Operator [SEL_140] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_139] (rows=130 width=12) - predicate:((d_qoy = 1) and (d_year = 2002)) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + Please refer to the previous Select Operator [SEL_128] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index a706e9459e..a029634671 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with my_customers as ( @@ -133,28 +133,26 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE) -Map 16 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 22 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) +Map 15 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) +Map 21 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 17 <- Map 22 (SIMPLE_EDGE), Union 16 (SIMPLE_EDGE) +Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 27 (SIMPLE_EDGE) -Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 26 (SIMPLE_EDGE) Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 27 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 27 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 26 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -164,25 +162,25 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_351] - Limit [LIM_350] (rows=1 width=16) + File Output Operator [FS_349] + Limit [LIM_348] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_349] (rows=1 width=16) + Select Operator [SEL_347] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=16) + SHUFFLE [RS_346] + Select Operator [SEL_345] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_346] (rows=1 width=12) + Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] + SHUFFLE [RS_343] PartitionCols:_col0 - Group By Operator [GBY_344] (rows=1 width=12) + Group By Operator [GBY_342] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_343] (rows=1 width=116) + Select Operator [SEL_341] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_342] (rows=1 width=116) + Group By Operator [GBY_340] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_119] @@ -197,82 +195,82 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"] Merge Join Operator [MERGEJOIN_273] (rows=5618315000 width=127) Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] Merge Join Operator [MERGEJOIN_270] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - Group By Operator [GBY_332] (rows=25 width=4) + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_331] + Group By Operator [GBY_330] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] PartitionCols:_col0 - Group By Operator [GBY_318] (rows=25 width=4) + Group By Operator [GBY_316] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_315] (rows=50 width=12) + Select Operator [SEL_313] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_313] (rows=50 width=12) + Filter Operator [FIL_311] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_73] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_341] - Select Operator [SEL_340] (rows=1 width=8) - Filter Operator [FIL_339] (rows=1 width=8) + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + Select Operator [SEL_338] (rows=1 width=8) + Filter Operator [FIL_337] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_338] (rows=1 width=8) + Group By Operator [GBY_336] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - Group By Operator [GBY_336] (rows=1 width=8) + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_335] + Group By Operator [GBY_334] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_335] (rows=25 width=4) - Group By Operator [GBY_334] (rows=25 width=4) + Select Operator [SEL_333] (rows=25 width=4) + Group By Operator [GBY_332] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Group By Operator [GBY_319] (rows=25 width=4) + Group By Operator [GBY_317] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_316] (rows=50 width=12) + Select Operator [SEL_314] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_313] + Please refer to the previous Filter Operator [FIL_311] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_113] Select Operator [SEL_108] (rows=224732600 width=119) Output:["_col0","_col4","_col11","_col13"] Merge Join Operator [MERGEJOIN_272] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] - Group By Operator [GBY_323] (rows=25 width=4) + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_323] + Group By Operator [GBY_321] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] PartitionCols:_col0 - Group By Operator [GBY_317] (rows=25 width=4) + Group By Operator [GBY_315] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_314] (rows=50 width=12) + Select Operator [SEL_312] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_313] + Please refer to the previous Filter Operator [FIL_311] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_105] Merge Join Operator [MERGEJOIN_271] (rows=8989304 width=8) Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - Select Operator [SEL_330] (rows=1 width=8) - Filter Operator [FIL_329] (rows=1 width=8) + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_329] + Select Operator [SEL_328] (rows=1 width=8) + Filter Operator [FIL_327] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_328] (rows=1 width=8) + Group By Operator [GBY_326] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_327] - Group By Operator [GBY_326] (rows=1 width=8) + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_325] + Group By Operator [GBY_324] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_324] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_323] + Select Operator [SEL_322] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_321] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_102] Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) @@ -281,145 +279,134 @@ Stage-0 SHUFFLE [RS_100] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_268] (rows=55046 width=4) - Conds:RS_69._col0=RS_304._col1(Inner),Output:["_col5"] + Conds:RS_69._col0=RS_310._col1(Inner),Output:["_col5"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_264] (rows=39720279 width=4) - Conds:RS_286._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col0"] + Conds:RS_292._col1, _col2=RS_295._col0, _col1(Inner),Output:["_col0"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + SHUFFLE [RS_292] PartitionCols:_col1, _col2 - Select Operator [SEL_285] (rows=40000000 width=188) + Select Operator [SEL_291] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_284] (rows=40000000 width=188) + Filter Operator [FIL_290] (rows=40000000 width=188) predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) TableScan [TS_29] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0, _col1 - Select Operator [SEL_288] (rows=1704 width=184) + Select Operator [SEL_294] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_287] (rows=1704 width=184) + Filter Operator [FIL_293] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_32] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] PartitionCols:_col1 - Select Operator [SEL_303] (rows=55046 width=8) + Select Operator [SEL_309] (rows=55046 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_302] (rows=55046 width=8) + Group By Operator [GBY_308] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 20 [SIMPLE_EDGE] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1 Group By Operator [GBY_62] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col6, _col5 Merge Join Operator [MERGEJOIN_267] (rows=110092 width=8) - Conds:RS_58._col1=RS_301._col0(Inner),Output:["_col5","_col6"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] + Conds:RS_58._col1=RS_307._col0(Inner),Output:["_col5","_col6"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] PartitionCols:_col0 - Select Operator [SEL_300] (rows=80000000 width=8) + Select Operator [SEL_306] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_299] (rows=80000000 width=8) + Filter Operator [FIL_305] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_49] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 19 [SIMPLE_EDGE] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_266] (rows=110092 width=0) - Conds:RS_55._col2=RS_298._col0(Inner),Output:["_col1"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + Conds:RS_55._col2=RS_304._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] PartitionCols:_col0 - Select Operator [SEL_297] (rows=453 width=4) + Select Operator [SEL_303] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_296] (rows=453 width=186) + Filter Operator [FIL_302] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) TableScan [TS_46] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_265] (rows=11665117 width=7) - Conds:Union 17._col0=RS_292._col0(Inner),Output:["_col1","_col2"] - <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_292] + Conds:Union 16._col0=RS_298._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_298] PartitionCols:_col0 - Select Operator [SEL_291] (rows=50 width=4) + Select Operator [SEL_297] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_290] (rows=50 width=12) + Filter Operator [FIL_296] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_43] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_357] + <-Union 16 [SIMPLE_EDGE] + <-Map 15 [CONTAINS] vectorized + Reduce Output Operator [RS_355] PartitionCols:_col0 - Select Operator [SEL_356] (rows=285117831 width=11) + Select Operator [SEL_354] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_355] (rows=285117831 width=11) + Filter Operator [FIL_353] (rows=285117831 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_274] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_353] - Group By Operator [GBY_352] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_351] + Group By Operator [GBY_350] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_301] + Group By Operator [GBY_300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=50 width=4) + Select Operator [SEL_299] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_291] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_360] + Please refer to the previous Select Operator [SEL_297] + <-Map 21 [CONTAINS] vectorized + Reduce Output Operator [RS_358] PartitionCols:_col0 - Select Operator [SEL_359] (rows=143930993 width=11) + Select Operator [SEL_357] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_358] (rows=143930993 width=11) + Filter Operator [FIL_356] (rows=143930993 width=11) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) TableScan [TS_279] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_354] - Please refer to the previous Group By Operator [GBY_352] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_352] + Please refer to the previous Group By Operator [GBY_350] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_99] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_263] (rows=525327388 width=114) - Conds:RS_309._col0=RS_312._col0(Inner),Output:["_col1","_col2","_col4"] + Conds:RS_286._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_308] (rows=525327388 width=114) + Select Operator [SEL_285] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_307] (rows=525327388 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_100_customer_c_customer_sk_min) AND DynamicValue(RS_100_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_100_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + Filter Operator [FIL_284] (rows=525327388 width=114) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_23] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_181] (rows=55046 width=8) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_268] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_311] (rows=73049 width=8) + Select Operator [SEL_288] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_310] (rows=73049 width=8) + Filter Operator [FIL_287] (rows=73049 width=8) predicate:d_date_sk is not null TableScan [TS_26] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out index 9eb50396f9..da49d2edb3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Reducer 10 <- Union 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_146] - Limit [LIM_145] (rows=1 width=200) + File Output Operator [FS_149] + Limit [LIM_148] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_144] (rows=1 width=200) + Select Operator [SEL_147] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=200) + SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -263,47 +263,47 @@ Stage-0 keys:_col6,sort order:+,top n:100 Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"] + Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_144] PartitionCols:_col2 - Select Operator [SEL_132] (rows=1704 width=276) + Select Operator [SEL_143] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_131] (rows=1704 width=181) + Filter Operator [FIL_142] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_129] (rows=1 width=184) + Select Operator [SEL_140] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_128] (rows=1 width=192) + Filter Operator [FIL_139] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_127] (rows=3098 width=192) + Group By Operator [GBY_138] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Union 9 [SIMPLE_EDGE] + <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_167] + Reduce Output Operator [RS_170] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=3098 width=192) + Group By Operator [GBY_169] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_165] (rows=1126 width=192) + Group By Operator [GBY_168] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_167] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=1126 width=192) + Group By Operator [GBY_166] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_162] (rows=2253 width=97) + Select Operator [SEL_165] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_161] (rows=2253 width=97) + Filter Operator [FIL_164] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_160] (rows=6761 width=97) + Group By Operator [GBY_163] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] @@ -311,40 +311,40 @@ Stage-0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"] + Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_155] (rows=40000000 width=93) + Select Operator [SEL_158] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=40000000 width=93) + Filter Operator [FIL_157] (rows=40000000 width=93) predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_158] (rows=26666667 width=4) + Select Operator [SEL_161] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_157] (rows=26666667 width=89) + Filter Operator [FIL_160] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] - <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_153] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_156] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=3098 width=192) + Group By Operator [GBY_155] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_151] (rows=5071 width=192) + Group By Operator [GBY_154] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_149] (rows=70994 width=192) + Group By Operator [GBY_152] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_148] (rows=20000000 width=89) + Select Operator [SEL_151] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_147] (rows=20000000 width=89) + Filter Operator [FIL_150] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] @@ -352,34 +352,34 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) - Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"] + Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_127] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_137] (rows=525329897 width=114) + Select Operator [SEL_136] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_136] (rows=525329897 width=114) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_135] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_92] - Group By Operator [GBY_91] (rows=1 width=12) + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_90] (rows=1 width=8) + Select Operator [SEL_130] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - PartitionCols:_col0 - Select Operator [SEL_140] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_139] (rows=130 width=12) - predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + Please refer to the previous Select Operator [SEL_128]