diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java index 81684be9c4..61892c9704 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java @@ -148,7 +148,7 @@ public static Object processReduceSinkToHashJoin(ReduceSinkOperator parentRS, Ma context.mapJoinToUnprocessedSmallTableReduceSinks.get(mapJoinOp).remove(parentRS); } - List mapJoinWork = null; + Set mapJoinWork = null; /* * if there was a pre-existing work generated for the big-table mapjoin side, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java index f977fc1b59..f9f52ecc0e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java @@ -107,7 +107,7 @@ public final Map> linkWorkWithReduceSinkMap; // map that says which mapjoin belongs to which work item - public final Map> mapJoinWorkMap; + public final Map> mapJoinWorkMap; // Mapping of reducesink to mapjoin operators // Only used for dynamic partitioned hash joins (mapjoin operator in the reducer) @@ -179,7 +179,7 @@ public GenTezProcContext(HiveConf conf, ParseContext parseContext, this.linkOpWithWorkMap = new LinkedHashMap, Map>(); this.linkWorkWithReduceSinkMap = new LinkedHashMap>(); this.smallTableParentToMapJoinMap = new LinkedHashMap, MapJoinOperator>(); - this.mapJoinWorkMap = new LinkedHashMap>(); + this.mapJoinWorkMap = new LinkedHashMap>(); this.rootToWorkMap = new LinkedHashMap, BaseWork>(); this.childToWorkMap = new LinkedHashMap, List>(); this.mapJoinParentMap = new LinkedHashMap>>(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 002dafaa0c..80c0747285 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hive.ql.parse; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; @@ -107,21 +109,9 @@ public Object process(Node nd, Stack stack, // operator graph. There's typically two reasons for that: a) mux/demux // b) multi insert. Mux/Demux will hit the same leaf again, multi insert // will result into a vertex with multiple FS or RS operators. - if (context.childToWorkMap.containsKey(operator)) { - // if we've seen both root and child, we can bail. - - // clear out the mapjoin set. we don't need it anymore. - context.currentMapJoinOperators.clear(); - - // clear out the union set. we don't need it anymore. - context.currentUnionOperators.clear(); - - return null; - } else { // At this point we don't have to do anything special. Just // run through the regular paces w/o creating a new task. work = context.rootToWorkMap.get(root); - } } else { // create a new vertex if (context.preceedingWork == null) { @@ -243,7 +233,7 @@ public Object process(Node nd, Stack stack, // remember the mapping in case we scan another branch of the // mapjoin later if (!context.mapJoinWorkMap.containsKey(mj)) { - List workItems = new LinkedList(); + Set workItems = new HashSet(); workItems.add(work); context.mapJoinWorkMap.put(mj, workItems); } else { @@ -344,15 +334,12 @@ public Object process(Node nd, Stack stack, // we've not seen this terminal before. we need to check // rootUnionWorkMap which contains the information of mapping the root // operator of a union work to a union work - unionWork = context.rootUnionWorkMap.get(root); - if (unionWork == null) { // if unionWork is null, it means it is the first time. we need to // create a union work object and add this work to it. Subsequent // work should reference the union and not the actual work. unionWork = GenTezUtils.createUnionWork(context, root, operator, tezWork); // finally connect the union work with work connectUnionWorkWithWork(unionWork, work, tezWork, context); - } } context.currentUnionOperators.clear(); work = unionWork; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 4b10e8974e..db80f25bd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -481,12 +481,6 @@ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, markSemiJoinForDPP(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); - // Remove any semi join edges from Union Op - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemiJoinEdgesForUnion(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, - "Remove any semi join edge between Union and RS"); - // Remove any parallel edge between semijoin and mapjoin. perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemijoinsParallelToMapJoin(procCtx); @@ -630,7 +624,7 @@ protected void generateTaskTree(List> rootTasks, Pa ogw.startWalking(topNodes, null); // we need to specify the reserved memory for each work that contains Map Join - for (List baseWorkList : procCtx.mapJoinWorkMap.values()) { + for (Set baseWorkList : procCtx.mapJoinWorkMap.values()) { for (BaseWork w : baseWorkList) { // work should be the smallest unit for memory allocation w.setReservedMemoryMB( diff --git a/ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out b/ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out index bcea4ae1a7..89bd7889f3 100644 --- a/ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out +++ b/ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out @@ -268,12 +268,12 @@ Stage-1 HIVE COUNTERS: CREATED_FILES: 123 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_4: 500 + RECORDS_IN_Map_5: 500 RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2_n5: 189 RECORDS_OUT_INTERMEDIATE_Map_1: 57 - RECORDS_OUT_INTERMEDIATE_Map_4: 64 - RECORDS_OUT_INTERMEDIATE_Reducer_3: 0 + RECORDS_OUT_INTERMEDIATE_Map_5: 64 + RECORDS_OUT_INTERMEDIATE_Reducer_4: 0 RECORDS_OUT_OPERATOR_FIL_20: 84 RECORDS_OUT_OPERATOR_FIL_29: 105 RECORDS_OUT_OPERATOR_FS_16: 121 @@ -297,13 +297,13 @@ Stage-1 HIVE COUNTERS: TOTAL_TABLE_ROWS_WRITTEN: 189 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 - GROUPED_INPUT_SPLITS_Map_4: 1 + GROUPED_INPUT_SPLITS_Map_5: 1 INPUT_DIRECTORIES_Map_1: 1 - INPUT_DIRECTORIES_Map_4: 1 + INPUT_DIRECTORIES_Map_5: 1 INPUT_FILES_Map_1: 1 - INPUT_FILES_Map_4: 1 + INPUT_FILES_Map_5: 1 RAW_INPUT_SPLITS_Map_1: 1 - RAW_INPUT_SPLITS_Map_4: 1 + RAW_INPUT_SPLITS_Map_5: 1 PREHOOK: query: insert into table src2_n5 partition (value) select temps.* from ( select * from src where key < 100 @@ -318,12 +318,12 @@ Stage-1 HIVE COUNTERS: CREATED_FILES: 186 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_4: 500 + RECORDS_IN_Map_5: 500 RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2_n5: 292 RECORDS_OUT_INTERMEDIATE_Map_1: 57 - RECORDS_OUT_INTERMEDIATE_Map_4: 127 - RECORDS_OUT_INTERMEDIATE_Reducer_3: 0 + RECORDS_OUT_INTERMEDIATE_Map_5: 127 + RECORDS_OUT_INTERMEDIATE_Reducer_4: 0 RECORDS_OUT_OPERATOR_FIL_20: 84 RECORDS_OUT_OPERATOR_FIL_29: 208 RECORDS_OUT_OPERATOR_FS_16: 184 @@ -347,10 +347,10 @@ Stage-1 HIVE COUNTERS: TOTAL_TABLE_ROWS_WRITTEN: 292 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 - GROUPED_INPUT_SPLITS_Map_4: 1 + GROUPED_INPUT_SPLITS_Map_5: 1 INPUT_DIRECTORIES_Map_1: 1 - INPUT_DIRECTORIES_Map_4: 1 + INPUT_DIRECTORIES_Map_5: 1 INPUT_FILES_Map_1: 1 - INPUT_FILES_Map_4: 1 + INPUT_FILES_Map_5: 1 RAW_INPUT_SPLITS_Map_1: 1 - RAW_INPUT_SPLITS_Map_4: 1 + RAW_INPUT_SPLITS_Map_5: 1 diff --git a/ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out b/ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out index 93b4a5493d..ae11f2efd7 100644 --- a/ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out +++ b/ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out @@ -268,12 +268,12 @@ Stage-1 HIVE COUNTERS: CREATED_FILES: 123 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_4: 500 + RECORDS_IN_Map_5: 500 RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2_n3: 189 RECORDS_OUT_INTERMEDIATE_Map_1: 57 - RECORDS_OUT_INTERMEDIATE_Map_4: 64 - RECORDS_OUT_INTERMEDIATE_Reducer_3: 0 + RECORDS_OUT_INTERMEDIATE_Map_5: 64 + RECORDS_OUT_INTERMEDIATE_Reducer_4: 0 RECORDS_OUT_OPERATOR_FIL_20: 84 RECORDS_OUT_OPERATOR_FIL_29: 105 RECORDS_OUT_OPERATOR_FS_16: 121 @@ -297,13 +297,13 @@ Stage-1 HIVE COUNTERS: TOTAL_TABLE_ROWS_WRITTEN: 189 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 - GROUPED_INPUT_SPLITS_Map_4: 1 + GROUPED_INPUT_SPLITS_Map_5: 1 INPUT_DIRECTORIES_Map_1: 1 - INPUT_DIRECTORIES_Map_4: 1 + INPUT_DIRECTORIES_Map_5: 1 INPUT_FILES_Map_1: 1 - INPUT_FILES_Map_4: 1 + INPUT_FILES_Map_5: 1 RAW_INPUT_SPLITS_Map_1: 1 - RAW_INPUT_SPLITS_Map_4: 1 + RAW_INPUT_SPLITS_Map_5: 1 PREHOOK: query: insert into table src2_n3 partition (value) select temps.* from ( select * from src where key < 100 @@ -318,12 +318,12 @@ Stage-1 HIVE COUNTERS: CREATED_FILES: 186 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_4: 500 + RECORDS_IN_Map_5: 500 RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2_n3: 292 RECORDS_OUT_INTERMEDIATE_Map_1: 57 - RECORDS_OUT_INTERMEDIATE_Map_4: 127 - RECORDS_OUT_INTERMEDIATE_Reducer_3: 0 + RECORDS_OUT_INTERMEDIATE_Map_5: 127 + RECORDS_OUT_INTERMEDIATE_Reducer_4: 0 RECORDS_OUT_OPERATOR_FIL_20: 84 RECORDS_OUT_OPERATOR_FIL_29: 208 RECORDS_OUT_OPERATOR_FS_16: 184 @@ -347,10 +347,10 @@ Stage-1 HIVE COUNTERS: TOTAL_TABLE_ROWS_WRITTEN: 292 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 - GROUPED_INPUT_SPLITS_Map_4: 1 + GROUPED_INPUT_SPLITS_Map_5: 1 INPUT_DIRECTORIES_Map_1: 1 - INPUT_DIRECTORIES_Map_4: 1 + INPUT_DIRECTORIES_Map_5: 1 INPUT_FILES_Map_1: 1 - INPUT_FILES_Map_4: 1 + INPUT_FILES_Map_5: 1 RAW_INPUT_SPLITS_Map_1: 1 - RAW_INPUT_SPLITS_Map_4: 1 + RAW_INPUT_SPLITS_Map_5: 1 diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index ecf6323e8e..8bda4088db 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -595,9 +595,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 9 <- Union 2 (CONTAINS) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 9 (CONTAINS) + Map 11 <- Union 2 (CONTAINS), Union 9 (CONTAINS) + Map 12 <- Reducer 10 (BROADCAST_EDGE) + Reducer 10 <- Union 9 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 12 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) @@ -624,22 +626,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 10 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 9 + Map 11 Map Operator Tree: TableScan alias: nonacidorctbl @@ -658,8 +660,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map 12 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl__col0_min) AND DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: ROW__ID (type: struct) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -849,6 +892,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 9 + Vertex: Union 9 Stage: Stage-5 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 36bd120028..0ec7a7a3ef 100644 --- a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -1181,24 +1181,24 @@ POSTHOOK: Output: default@c_n3 Plan optimized by CBO. Vertex dependency in root stage -Map 13 <- Union 14 (CONTAINS) -Map 15 <- Union 14 (CONTAINS) -Map 17 <- Union 18 (CONTAINS) -Map 19 <- Union 18 (CONTAINS) -Map 20 <- Union 18 (CONTAINS) +Map 18 <- Union 19 (CONTAINS) +Map 20 <- Union 19 (CONTAINS) Map 22 <- Union 23 (CONTAINS) Map 24 <- Union 23 (CONTAINS) Map 25 <- Union 23 (CONTAINS) -Map 26 <- Union 23 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 4 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Union 4 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 27 <- Union 28 (CONTAINS) +Map 29 <- Union 28 (CONTAINS) +Map 30 <- Union 28 (CONTAINS) +Map 31 <- Union 28 (CONTAINS) +Reducer 12 <- Union 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 10 (CONTAINS), Union 11 (CONTAINS), Union 23 (SIMPLE_EDGE), Union 4 (CONTAINS), Union 5 (CONTAINS), Union 7 (CONTAINS), Union 8 (CONTAINS) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Union 10 (CONTAINS), Union 11 (CONTAINS), Union 28 (SIMPLE_EDGE), Union 4 (CONTAINS), Union 5 (CONTAINS), Union 7 (CONTAINS), Union 8 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 10 (CONTAINS), Union 11 (CONTAINS), Union 19 (SIMPLE_EDGE), Union 4 (CONTAINS), Union 5 (CONTAINS), Union 7 (CONTAINS), Union 8 (CONTAINS) +Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Union 8 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -1208,24 +1208,100 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 5 llap - File Output Operator [FS_81] - Group By Operator [GBY_79] (rows=1/1 width=880) + Reducer 12 llap + File Output Operator [FS_97] + Group By Operator [GBY_95] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Union 4 [CUSTOM_SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] llap + <-Union 11 [CUSTOM_SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] llap + File Output Operator [FS_221] + table:{"name:":"default.a_n14"} + Select Operator [SEL_219] (rows=2640/5421 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) + Conds:RS_41._col1=Union 23._col0(Inner),Output:["_col0","_col3"] + <-Reducer 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) + Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_123] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_124] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_24] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] llap + Reduce Output Operator [RS_256] + PartitionCols:_col0 + Select Operator [SEL_254] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_253] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_252] (rows=25/25 width=89) + Output:["value"] + <-Map 24 [CONTAINS] llap + Reduce Output Operator [RS_261] + PartitionCols:_col0 + Select Operator [SEL_259] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_258] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_257] (rows=500/500 width=91) + Output:["value"] + <-Map 25 [CONTAINS] llap + Reduce Output Operator [RS_266] + PartitionCols:_col0 + Select Operator [SEL_264] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_263] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_262] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_227] + Select Operator [SEL_222] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + File Output Operator [FS_223] + table:{"name:":"default.b_n10"} + Please refer to the previous Select Operator [SEL_219] + Reduce Output Operator [RS_228] + Select Operator [SEL_224] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + File Output Operator [FS_225] + table:{"name:":"default.c_n3"} + Please refer to the previous Select Operator [SEL_219] + Reduce Output Operator [RS_229] + Select Operator [SEL_226] (rows=2899/5421 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_219] + <-Reducer 17 [CONTAINS] llap File Output Operator [FS_233] table:{"name:":"default.a_n14"} Select Operator [SEL_231] (rows=193/820 width=175) Output:["_col0","_col1"] Merge Join Operator [MERGEJOIN_230] (rows=193/820 width=175) - Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] - <-Reducer 11 [SIMPLE_EDGE] llap + Conds:RS_69._col1=Union 28._col0(Inner),Output:["_col0","_col3"] + <-Reducer 16 [SIMPLE_EDGE] llap SHUFFLE [RS_69] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_202] (rows=39/115 width=264) Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] llap + <-Map 15 [SIMPLE_EDGE] llap SHUFFLE [RS_66] PartitionCols:_col0 Select Operator [SEL_5] (rows=25/25 width=175) @@ -1234,7 +1310,7 @@ Stage-5 predicate:(key is not null and value is not null) TableScan [TS_3] (rows=25/25 width=175) default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 21 [SIMPLE_EDGE] llap + <-Map 26 [SIMPLE_EDGE] llap SHUFFLE [RS_67] PartitionCols:_col0 Select Operator [SEL_51] (rows=25/25 width=175) @@ -1243,8 +1319,8 @@ Stage-5 predicate:key is not null TableScan [TS_49] (rows=25/25 width=175) default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 23 [SIMPLE_EDGE] - <-Map 22 [CONTAINS] llap + <-Union 28 [SIMPLE_EDGE] + <-Map 27 [CONTAINS] llap Reduce Output Operator [RS_271] PartitionCols:_col0 Select Operator [SEL_269] (rows=25/25 width=89) @@ -1253,7 +1329,7 @@ Stage-5 predicate:value is not null TableScan [TS_267] (rows=25/25 width=89) Output:["value"] - <-Map 24 [CONTAINS] llap + <-Map 29 [CONTAINS] llap Reduce Output Operator [RS_276] PartitionCols:_col0 Select Operator [SEL_274] (rows=500/500 width=91) @@ -1262,7 +1338,7 @@ Stage-5 predicate:value is not null TableScan [TS_272] (rows=500/500 width=91) Output:["value"] - <-Map 25 [CONTAINS] llap + <-Map 30 [CONTAINS] llap Reduce Output Operator [RS_281] PartitionCols:_col0 Select Operator [SEL_279] (rows=500/500 width=91) @@ -1271,7 +1347,7 @@ Stage-5 predicate:value is not null TableScan [TS_277] (rows=500/500 width=91) Output:["value"] - <-Map 26 [CONTAINS] llap + <-Map 31 [CONTAINS] llap Reduce Output Operator [RS_286] PartitionCols:_col0 Select Operator [SEL_284] (rows=500/500 width=91) @@ -1304,7 +1380,7 @@ Stage-5 Select Operator [SEL_207] (rows=66/170 width=177) Output:["_col0","_col1"] Merge Join Operator [MERGEJOIN_206] (rows=66/170 width=177) - Conds:RS_17._col3=Union 14._col0(Inner),Output:["_col1","_col2"] + Conds:RS_17._col3=Union 19._col0(Inner),Output:["_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col3 @@ -1317,14 +1393,13 @@ Stage-5 Output:["_col0","_col1"] Filter Operator [FIL_119] (rows=500/500 width=178) predicate:key is not null - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 10 [SIMPLE_EDGE] llap + Please refer to the previous TableScan [TS_0] + <-Map 15 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_5] - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] llap + <-Union 19 [SIMPLE_EDGE] + <-Map 18 [CONTAINS] llap Reduce Output Operator [RS_246] PartitionCols:_col0 Select Operator [SEL_244] (rows=25/25 width=89) @@ -1333,7 +1408,7 @@ Stage-5 predicate:value is not null TableScan [TS_242] (rows=25/25 width=89) Output:["value"] - <-Map 15 [CONTAINS] llap + <-Map 20 [CONTAINS] llap Reduce Output Operator [RS_251] PartitionCols:_col0 Select Operator [SEL_249] (rows=500/500 width=91) @@ -1360,91 +1435,124 @@ Stage-5 Select Operator [SEL_214] (rows=2899/170 width=178) Output:["key","value"] Please refer to the previous Select Operator [SEL_207] - <-Reducer 9 [CONTAINS] llap - File Output Operator [FS_221] - table:{"name:":"default.a_n14"} - Select Operator [SEL_219] (rows=2640/5421 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) - Conds:RS_41._col1=Union 18._col0(Inner),Output:["_col0","_col3"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) - Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] - <-Map 16 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_24] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] llap - Reduce Output Operator [RS_256] - PartitionCols:_col0 - Select Operator [SEL_254] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_253] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_252] (rows=25/25 width=89) - Output:["value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_261] - PartitionCols:_col0 - Select Operator [SEL_259] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_258] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_257] (rows=500/500 width=91) - Output:["value"] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_266] - PartitionCols:_col0 - Select Operator [SEL_264] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_263] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_262] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_227] - Select Operator [SEL_222] (rows=2899/5421 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_223] - table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_228] - Select Operator [SEL_224] (rows=2899/5421 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_225] - table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_229] - Select Operator [SEL_226] (rows=2899/5421 width=178) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] Reducer 6 llap + File Output Operator [FS_81] + Group By Operator [GBY_79] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Union 5 [CUSTOM_SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_221] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_223] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_225] + Please refer to the previous Reduce Output Operator [RS_229] + <-Reducer 17 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_239] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_240] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_241] + <-Reducer 3 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_209] + Please refer to the previous Reduce Output Operator [RS_215] + Please refer to the previous File Output Operator [FS_211] + Please refer to the previous Reduce Output Operator [RS_216] + Please refer to the previous File Output Operator [FS_213] + Please refer to the previous Reduce Output Operator [RS_217] + Reducer 9 llap File Output Operator [FS_89] Group By Operator [GBY_87] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] - Reducer 7 llap - File Output Operator [FS_97] - Group By Operator [GBY_95] (rows=1/1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + <-Union 8 [CUSTOM_SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_221] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_223] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_225] + Please refer to the previous Reduce Output Operator [RS_229] + <-Reducer 17 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_239] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_240] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_241] + <-Reducer 3 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_209] + Please refer to the previous Reduce Output Operator [RS_215] + Please refer to the previous File Output Operator [FS_211] + Please refer to the previous Reduce Output Operator [RS_216] + Please refer to the previous File Output Operator [FS_213] + Please refer to the previous Reduce Output Operator [RS_217] + Union 10 + <-Reducer 14 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_221] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_223] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_225] + Please refer to the previous Reduce Output Operator [RS_229] + <-Reducer 17 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_239] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_240] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_241] + <-Reducer 3 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_209] + Please refer to the previous Reduce Output Operator [RS_215] + Please refer to the previous File Output Operator [FS_211] + Please refer to the previous Reduce Output Operator [RS_216] + Please refer to the previous File Output Operator [FS_213] + Please refer to the previous Reduce Output Operator [RS_217] + Union 4 + <-Reducer 14 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_221] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_223] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_225] + Please refer to the previous Reduce Output Operator [RS_229] + <-Reducer 17 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_239] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_240] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_241] + <-Reducer 3 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_209] + Please refer to the previous Reduce Output Operator [RS_215] + Please refer to the previous File Output Operator [FS_211] + Please refer to the previous Reduce Output Operator [RS_216] + Please refer to the previous File Output Operator [FS_213] + Please refer to the previous Reduce Output Operator [RS_217] + Union 7 + <-Reducer 14 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_221] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_223] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_225] + Please refer to the previous Reduce Output Operator [RS_229] + <-Reducer 17 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_239] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_240] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_241] + <-Reducer 3 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_209] + Please refer to the previous Reduce Output Operator [RS_215] + Please refer to the previous File Output Operator [FS_211] + Please refer to the previous Reduce Output Operator [RS_216] + Please refer to the previous File Output Operator [FS_213] + Please refer to the previous Reduce Output Operator [RS_217] Stage-6 Stats Work{} Stage-1 @@ -2107,13 +2215,13 @@ POSTHOOK: Output: default@dest2_n29 Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Union 3 (CONTAINS) -Map 9 <- Union 3 (CONTAINS) -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) +Map 10 <- Union 3 (CONTAINS), Union 6 (CONTAINS) +Map 9 <- Union 3 (CONTAINS), Union 6 (CONTAINS) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 3 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -2135,17 +2243,7 @@ Stage-4 Group By Operator [GBY_13] (rows=316/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_43] - PartitionCols:_col0 - Select Operator [SEL_41] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_40] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_44] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_41] - <-Map 9 [CONTAINS] llap + <-Map 10 [CONTAINS] llap Reduce Output Operator [RS_48] PartitionCols:_col0 Select Operator [SEL_46] (rows=500/500 width=178) @@ -2155,6 +2253,16 @@ Stage-4 Reduce Output Operator [RS_49] PartitionCols:_col0, _col1 Please refer to the previous Select Operator [SEL_46] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_43] + PartitionCols:_col0 + Select Operator [SEL_41] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_40] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_44] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_41] <-Reducer 2 [CONTAINS] llap Reduce Output Operator [RS_38] PartitionCols:_col0 @@ -2174,18 +2282,27 @@ Stage-4 Select Operator [SEL_18] (rows=316/310 width=272) Output:["key","value"] Please refer to the previous Select Operator [SEL_14] - Reducer 7 llap + Reducer 8 llap File Output Operator [FS_34] Group By Operator [GBY_32] (rows=1/1 width=1320) Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap File Output Operator [FS_28] table:{"name:":"default.dest2_n29"} Select Operator [SEL_26] (rows=1001/310 width=456) Output:["_col0","_col1","_col2"] Group By Operator [GBY_25] (rows=1001/310 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + <-Union 6 [SIMPLE_EDGE] + <-Map 10 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_48] + Please refer to the previous Reduce Output Operator [RS_49] + <-Map 9 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_43] + Please refer to the previous Reduce Output Operator [RS_44] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_38] + Please refer to the previous Reduce Output Operator [RS_39] PARTITION_ONLY_SHUFFLE [RS_31] Select Operator [SEL_30] (rows=1001/310 width=456) Output:["key","val1","val2"] @@ -2250,12 +2367,12 @@ POSTHOOK: Output: default@dest2_n29 Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Union 3 (CONTAINS) -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS), Union 6 (CONTAINS) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 3 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -2277,7 +2394,7 @@ Stage-4 Group By Operator [GBY_11] (rows=316/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap + <-Map 9 [CONTAINS] llap Reduce Output Operator [RS_41] PartitionCols:_col0 Select Operator [SEL_39] (rows=500/500 width=178) @@ -2306,18 +2423,24 @@ Stage-4 Select Operator [SEL_16] (rows=316/310 width=272) Output:["key","value"] Please refer to the previous Select Operator [SEL_12] - Reducer 7 llap + Reducer 8 llap File Output Operator [FS_32] Group By Operator [GBY_30] (rows=1/1 width=1320) Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap File Output Operator [FS_26] table:{"name:":"default.dest2_n29"} Select Operator [SEL_24] (rows=501/310 width=456) Output:["_col0","_col1","_col2"] Group By Operator [GBY_23] (rows=501/310 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + <-Union 6 [SIMPLE_EDGE] + <-Map 9 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_41] + Please refer to the previous Reduce Output Operator [RS_42] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_36] + Please refer to the previous Reduce Output Operator [RS_37] PARTITION_ONLY_SHUFFLE [RS_29] Select Operator [SEL_28] (rows=501/310 width=456) Output:["key","val1","val2"] diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 9f5a4b96e8..00ec076d4a 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -2716,23 +2716,23 @@ POSTHOOK: Output: default@c_n4 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 9 (BROADCAST_EDGE) -Map 10 <- Union 11 (CONTAINS) -Map 12 <- Union 11 (CONTAINS) -Map 14 <- Union 15 (CONTAINS) -Map 16 <- Union 15 (CONTAINS) -Map 17 <- Union 15 (CONTAINS) -Map 19 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 20 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 21 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 22 <- Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 9 <- Map 18 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 11 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 15 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 1 <- Map 14 (BROADCAST_EDGE) +Map 14 <- Map 23 (BROADCAST_EDGE) +Map 15 <- Union 16 (CONTAINS) +Map 17 <- Union 16 (CONTAINS) +Map 19 <- Union 20 (CONTAINS) +Map 21 <- Union 20 (CONTAINS) +Map 22 <- Union 20 (CONTAINS) +Map 24 <- Map 14 (BROADCAST_EDGE), Union 10 (CONTAINS), Union 3 (CONTAINS), Union 4 (CONTAINS), Union 6 (CONTAINS), Union 7 (CONTAINS), Union 9 (CONTAINS) +Map 25 <- Map 14 (BROADCAST_EDGE), Union 10 (CONTAINS), Union 3 (CONTAINS), Union 4 (CONTAINS), Union 6 (CONTAINS), Union 7 (CONTAINS), Union 9 (CONTAINS) +Map 26 <- Map 14 (BROADCAST_EDGE), Union 10 (CONTAINS), Union 3 (CONTAINS), Union 4 (CONTAINS), Union 6 (CONTAINS), Union 7 (CONTAINS), Union 9 (CONTAINS) +Map 27 <- Map 14 (BROADCAST_EDGE), Union 10 (CONTAINS), Union 3 (CONTAINS), Union 4 (CONTAINS), Union 6 (CONTAINS), Union 7 (CONTAINS), Union 9 (CONTAINS) +Reducer 11 <- Union 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 1 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 10 (CONTAINS), Union 20 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS), Union 6 (CONTAINS), Union 7 (CONTAINS), Union 9 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 10 (CONTAINS), Union 16 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS), Union 6 (CONTAINS), Union 7 (CONTAINS), Union 9 (CONTAINS) +Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Union 7 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -2742,24 +2742,24 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 4 llap - File Output Operator [FS_82] - Group By Operator [GBY_80] (rows=1 width=880) + Reducer 11 llap + File Output Operator [FS_100] + Group By Operator [GBY_98] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Union 3 [CUSTOM_SIMPLE_EDGE] - <-Map 19 [CONTAINS] llap + <-Union 10 [CUSTOM_SIMPLE_EDGE] + <-Map 24 [CONTAINS] llap File Output Operator [FS_278] table:{"name:":"default.a_n19"} Select Operator [SEL_275] (rows=1677 width=10) Output:["_col0","_col1"] Map Join Operator [MAPJOIN_273] (rows=1677 width=10) Conds:RS_357._col1=SEL_274._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap + <-Map 14 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_357] PartitionCols:_col1 Map Join Operator [MAPJOIN_356] (rows=27 width=7) Conds:SEL_354._col0=RS_352._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 18 [BROADCAST_EDGE] vectorized, llap + <-Map 23 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_352] PartitionCols:_col0 Select Operator [SEL_351] (rows=25 width=7) @@ -2804,14 +2804,14 @@ Stage-5 Select Operator [SEL_283] (rows=3409 width=10) Output:["key","value"] Please refer to the previous Select Operator [SEL_275] - <-Map 20 [CONTAINS] llap + <-Map 25 [CONTAINS] llap File Output Operator [FS_298] table:{"name:":"default.a_n19"} Select Operator [SEL_295] (rows=1677 width=10) Output:["_col0","_col1"] Map Join Operator [MAPJOIN_293] (rows=1677 width=10) Conds:RS_358._col1=SEL_294._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap + <-Map 14 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_358] PartitionCols:_col1 Please refer to the previous Map Join Operator [MAPJOIN_356] @@ -2845,14 +2845,14 @@ Stage-5 Select Operator [SEL_303] (rows=3409 width=10) Output:["key","value"] Please refer to the previous Select Operator [SEL_295] - <-Map 21 [CONTAINS] llap + <-Map 26 [CONTAINS] llap File Output Operator [FS_318] table:{"name:":"default.a_n19"} Select Operator [SEL_315] (rows=1677 width=10) Output:["_col0","_col1"] Map Join Operator [MAPJOIN_313] (rows=1677 width=10) Conds:RS_359._col1=SEL_314._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap + <-Map 14 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_359] PartitionCols:_col1 Please refer to the previous Map Join Operator [MAPJOIN_356] @@ -2886,14 +2886,14 @@ Stage-5 Select Operator [SEL_323] (rows=3409 width=10) Output:["key","value"] Please refer to the previous Select Operator [SEL_315] - <-Map 22 [CONTAINS] llap + <-Map 27 [CONTAINS] llap File Output Operator [FS_338] table:{"name:":"default.a_n19"} Select Operator [SEL_335] (rows=1677 width=10) Output:["_col0","_col1"] Map Join Operator [MAPJOIN_333] (rows=1677 width=10) Conds:RS_360._col1=SEL_334._col0(Inner),Output:["_col0","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap + <-Map 14 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_360] PartitionCols:_col1 Please refer to the previous Map Join Operator [MAPJOIN_356] @@ -2927,79 +2927,14 @@ Stage-5 Select Operator [SEL_343] (rows=3409 width=10) Output:["key","value"] Please refer to the previous Select Operator [SEL_335] - <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_218] - table:{"name:":"default.a_n19"} - Select Operator [SEL_216] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_215] (rows=605 width=10) - Conds:RS_367._col3=Union 11._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_367] - PartitionCols:_col3 - Map Join Operator [MAPJOIN_365] (rows=550 width=10) - Conds:SEL_363._col0=RS_355._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_355] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_354] - <-Select Operator [SEL_363] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=500 width=10) - predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_373] - PartitionCols:_col0 - Select Operator [SEL_372] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_371] (rows=25 width=7) - predicate:value is not null - TableScan [TS_245] (rows=25 width=7) - Output:["value"] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_376] - PartitionCols:_col0 - Select Operator [SEL_375] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_374] (rows=500 width=10) - predicate:value is not null - TableScan [TS_250] (rows=500 width=10) - Output:["value"] - Reduce Output Operator [RS_227] - Group By Operator [GBY_224] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_219] (rows=3409 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_216] - File Output Operator [FS_220] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_216] - Reduce Output Operator [RS_228] - Group By Operator [GBY_225] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_221] (rows=3409 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_216] - File Output Operator [FS_222] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_216] - Reduce Output Operator [RS_229] - Group By Operator [GBY_226] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_223] (rows=3409 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_216] - <-Reducer 8 [CONTAINS] llap + <-Reducer 13 [CONTAINS] llap File Output Operator [FS_233] table:{"name:":"default.a_n19"} Select Operator [SEL_231] (rows=1127 width=10) Output:["_col0","_col1"] Merge Join Operator [MERGEJOIN_230] (rows=1127 width=10) - Conds:RS_41._col1=Union 15._col0(Inner),Output:["_col0","_col3"] - <-Reducer 7 [SIMPLE_EDGE] llap + Conds:RS_41._col1=Union 20._col0(Inner),Output:["_col0","_col3"] + <-Reducer 12 [SIMPLE_EDGE] llap SHUFFLE [RS_41] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_204] (rows=550 width=10) @@ -3011,8 +2946,9 @@ Stage-5 Output:["_col0","_col1"] Filter Operator [FIL_362] (rows=500 width=10) predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] - <-Map 13 [SIMPLE_EDGE] vectorized, llap + TableScan [TS_0] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 18 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_370] PartitionCols:_col0 Select Operator [SEL_369] (rows=500 width=10) @@ -3021,8 +2957,8 @@ Stage-5 predicate:key is not null TableScan [TS_24] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] vectorized, llap + <-Union 20 [SIMPLE_EDGE] + <-Map 19 [CONTAINS] vectorized, llap Reduce Output Operator [RS_379] PartitionCols:_col0 Select Operator [SEL_378] (rows=25 width=7) @@ -3031,7 +2967,7 @@ Stage-5 predicate:value is not null TableScan [TS_255] (rows=25 width=7) Output:["value"] - <-Map 16 [CONTAINS] vectorized, llap + <-Map 21 [CONTAINS] vectorized, llap Reduce Output Operator [RS_382] PartitionCols:_col0 Select Operator [SEL_381] (rows=500 width=10) @@ -3040,7 +2976,7 @@ Stage-5 predicate:value is not null TableScan [TS_260] (rows=500 width=10) Output:["value"] - <-Map 17 [CONTAINS] vectorized, llap + <-Map 22 [CONTAINS] vectorized, llap Reduce Output Operator [RS_385] PartitionCols:_col0 Select Operator [SEL_384] (rows=500 width=10) @@ -3073,16 +3009,293 @@ Stage-5 Select Operator [SEL_238] (rows=3409 width=10) Output:["key","value"] Please refer to the previous Select Operator [SEL_231] + <-Reducer 2 [CONTAINS] llap + File Output Operator [FS_218] + table:{"name:":"default.a_n19"} + Select Operator [SEL_216] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_215] (rows=605 width=10) + Conds:RS_367._col3=Union 16._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_367] + PartitionCols:_col3 + Map Join Operator [MAPJOIN_365] (rows=550 width=10) + Conds:SEL_363._col0=RS_355._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 14 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_355] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_354] + <-Select Operator [SEL_363] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_361] (rows=500 width=10) + predicate:key is not null + Please refer to the previous TableScan [TS_0] + <-Union 16 [SIMPLE_EDGE] + <-Map 15 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_373] + PartitionCols:_col0 + Select Operator [SEL_372] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_371] (rows=25 width=7) + predicate:value is not null + TableScan [TS_245] (rows=25 width=7) + Output:["value"] + <-Map 17 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_376] + PartitionCols:_col0 + Select Operator [SEL_375] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_374] (rows=500 width=10) + predicate:value is not null + TableScan [TS_250] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_227] + Group By Operator [GBY_224] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_219] (rows=3409 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_216] + File Output Operator [FS_220] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_216] + Reduce Output Operator [RS_228] + Group By Operator [GBY_225] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_221] (rows=3409 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_216] + File Output Operator [FS_222] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_216] + Reduce Output Operator [RS_229] + Group By Operator [GBY_226] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_223] (rows=3409 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_216] Reducer 5 llap + File Output Operator [FS_82] + Group By Operator [GBY_80] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Union 4 [CUSTOM_SIMPLE_EDGE] + <-Map 24 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_278] + Please refer to the previous Reduce Output Operator [RS_287] + Please refer to the previous File Output Operator [FS_280] + Please refer to the previous Reduce Output Operator [RS_288] + Please refer to the previous File Output Operator [FS_282] + Please refer to the previous Reduce Output Operator [RS_289] + <-Map 25 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_298] + Please refer to the previous Reduce Output Operator [RS_307] + Please refer to the previous File Output Operator [FS_300] + Please refer to the previous Reduce Output Operator [RS_308] + Please refer to the previous File Output Operator [FS_302] + Please refer to the previous Reduce Output Operator [RS_309] + <-Map 26 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_318] + Please refer to the previous Reduce Output Operator [RS_327] + Please refer to the previous File Output Operator [FS_320] + Please refer to the previous Reduce Output Operator [RS_328] + Please refer to the previous File Output Operator [FS_322] + Please refer to the previous Reduce Output Operator [RS_329] + <-Map 27 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_338] + Please refer to the previous Reduce Output Operator [RS_347] + Please refer to the previous File Output Operator [FS_340] + Please refer to the previous Reduce Output Operator [RS_348] + Please refer to the previous File Output Operator [FS_342] + Please refer to the previous Reduce Output Operator [RS_349] + <-Reducer 13 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_242] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_243] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_244] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_218] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_220] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_222] + Please refer to the previous Reduce Output Operator [RS_229] + Reducer 8 llap File Output Operator [FS_91] Group By Operator [GBY_89] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] - Reducer 6 llap - File Output Operator [FS_100] - Group By Operator [GBY_98] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + <-Union 7 [CUSTOM_SIMPLE_EDGE] + <-Map 24 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_278] + Please refer to the previous Reduce Output Operator [RS_287] + Please refer to the previous File Output Operator [FS_280] + Please refer to the previous Reduce Output Operator [RS_288] + Please refer to the previous File Output Operator [FS_282] + Please refer to the previous Reduce Output Operator [RS_289] + <-Map 25 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_298] + Please refer to the previous Reduce Output Operator [RS_307] + Please refer to the previous File Output Operator [FS_300] + Please refer to the previous Reduce Output Operator [RS_308] + Please refer to the previous File Output Operator [FS_302] + Please refer to the previous Reduce Output Operator [RS_309] + <-Map 26 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_318] + Please refer to the previous Reduce Output Operator [RS_327] + Please refer to the previous File Output Operator [FS_320] + Please refer to the previous Reduce Output Operator [RS_328] + Please refer to the previous File Output Operator [FS_322] + Please refer to the previous Reduce Output Operator [RS_329] + <-Map 27 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_338] + Please refer to the previous Reduce Output Operator [RS_347] + Please refer to the previous File Output Operator [FS_340] + Please refer to the previous Reduce Output Operator [RS_348] + Please refer to the previous File Output Operator [FS_342] + Please refer to the previous Reduce Output Operator [RS_349] + <-Reducer 13 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_242] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_243] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_244] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_218] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_220] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_222] + Please refer to the previous Reduce Output Operator [RS_229] + Union 3 + <-Map 24 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_278] + Please refer to the previous Reduce Output Operator [RS_287] + Please refer to the previous File Output Operator [FS_280] + Please refer to the previous Reduce Output Operator [RS_288] + Please refer to the previous File Output Operator [FS_282] + Please refer to the previous Reduce Output Operator [RS_289] + <-Map 25 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_298] + Please refer to the previous Reduce Output Operator [RS_307] + Please refer to the previous File Output Operator [FS_300] + Please refer to the previous Reduce Output Operator [RS_308] + Please refer to the previous File Output Operator [FS_302] + Please refer to the previous Reduce Output Operator [RS_309] + <-Map 26 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_318] + Please refer to the previous Reduce Output Operator [RS_327] + Please refer to the previous File Output Operator [FS_320] + Please refer to the previous Reduce Output Operator [RS_328] + Please refer to the previous File Output Operator [FS_322] + Please refer to the previous Reduce Output Operator [RS_329] + <-Map 27 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_338] + Please refer to the previous Reduce Output Operator [RS_347] + Please refer to the previous File Output Operator [FS_340] + Please refer to the previous Reduce Output Operator [RS_348] + Please refer to the previous File Output Operator [FS_342] + Please refer to the previous Reduce Output Operator [RS_349] + <-Reducer 13 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_242] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_243] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_244] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_218] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_220] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_222] + Please refer to the previous Reduce Output Operator [RS_229] + Union 6 + <-Map 24 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_278] + Please refer to the previous Reduce Output Operator [RS_287] + Please refer to the previous File Output Operator [FS_280] + Please refer to the previous Reduce Output Operator [RS_288] + Please refer to the previous File Output Operator [FS_282] + Please refer to the previous Reduce Output Operator [RS_289] + <-Map 25 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_298] + Please refer to the previous Reduce Output Operator [RS_307] + Please refer to the previous File Output Operator [FS_300] + Please refer to the previous Reduce Output Operator [RS_308] + Please refer to the previous File Output Operator [FS_302] + Please refer to the previous Reduce Output Operator [RS_309] + <-Map 26 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_318] + Please refer to the previous Reduce Output Operator [RS_327] + Please refer to the previous File Output Operator [FS_320] + Please refer to the previous Reduce Output Operator [RS_328] + Please refer to the previous File Output Operator [FS_322] + Please refer to the previous Reduce Output Operator [RS_329] + <-Map 27 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_338] + Please refer to the previous Reduce Output Operator [RS_347] + Please refer to the previous File Output Operator [FS_340] + Please refer to the previous Reduce Output Operator [RS_348] + Please refer to the previous File Output Operator [FS_342] + Please refer to the previous Reduce Output Operator [RS_349] + <-Reducer 13 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_242] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_243] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_244] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_218] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_220] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_222] + Please refer to the previous Reduce Output Operator [RS_229] + Union 9 + <-Map 24 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_278] + Please refer to the previous Reduce Output Operator [RS_287] + Please refer to the previous File Output Operator [FS_280] + Please refer to the previous Reduce Output Operator [RS_288] + Please refer to the previous File Output Operator [FS_282] + Please refer to the previous Reduce Output Operator [RS_289] + <-Map 25 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_298] + Please refer to the previous Reduce Output Operator [RS_307] + Please refer to the previous File Output Operator [FS_300] + Please refer to the previous Reduce Output Operator [RS_308] + Please refer to the previous File Output Operator [FS_302] + Please refer to the previous Reduce Output Operator [RS_309] + <-Map 26 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_318] + Please refer to the previous Reduce Output Operator [RS_327] + Please refer to the previous File Output Operator [FS_320] + Please refer to the previous Reduce Output Operator [RS_328] + Please refer to the previous File Output Operator [FS_322] + Please refer to the previous Reduce Output Operator [RS_329] + <-Map 27 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_338] + Please refer to the previous Reduce Output Operator [RS_347] + Please refer to the previous File Output Operator [FS_340] + Please refer to the previous Reduce Output Operator [RS_348] + Please refer to the previous File Output Operator [FS_342] + Please refer to the previous Reduce Output Operator [RS_349] + <-Reducer 13 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_233] + Please refer to the previous Reduce Output Operator [RS_242] + Please refer to the previous File Output Operator [FS_235] + Please refer to the previous Reduce Output Operator [RS_243] + Please refer to the previous File Output Operator [FS_237] + Please refer to the previous Reduce Output Operator [RS_244] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous File Output Operator [FS_218] + Please refer to the previous Reduce Output Operator [RS_227] + Please refer to the previous File Output Operator [FS_220] + Please refer to the previous Reduce Output Operator [RS_228] + Please refer to the previous File Output Operator [FS_222] + Please refer to the previous Reduce Output Operator [RS_229] Stage-6 Stats Work{} Stage-1 @@ -3725,13 +3938,13 @@ POSTHOOK: Output: default@dest2_n43 Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Union 3 (CONTAINS) -Map 9 <- Union 3 (CONTAINS) -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) +Map 10 <- Union 3 (CONTAINS), Union 6 (CONTAINS) +Map 9 <- Union 3 (CONTAINS), Union 6 (CONTAINS) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 3 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -3753,21 +3966,7 @@ Stage-4 Group By Operator [GBY_15] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap - Reduce Output Operator [RS_52] - PartitionCols:_col0 - Group By Operator [GBY_50] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_48] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_47] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_53] - PartitionCols:_col0, _col1 - Group By Operator [GBY_51] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_48] - <-Map 9 [CONTAINS] llap + <-Map 10 [CONTAINS] llap Reduce Output Operator [RS_59] PartitionCols:_col0 Group By Operator [GBY_57] (rows=1 width=280) @@ -3781,6 +3980,20 @@ Stage-4 Group By Operator [GBY_58] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_55] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_52] + PartitionCols:_col0 + Group By Operator [GBY_50] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_48] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_47] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_53] + PartitionCols:_col0, _col1 + Group By Operator [GBY_51] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_48] <-Reducer 2 [CONTAINS] llap Reduce Output Operator [RS_45] PartitionCols:_col0 @@ -3808,18 +4021,27 @@ Stage-4 Select Operator [SEL_20] (rows=1 width=272) Output:["key","value"] Please refer to the previous Select Operator [SEL_16] - Reducer 7 llap + Reducer 8 llap File Output Operator [FS_39] Group By Operator [GBY_37] (rows=1 width=1320) Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap File Output Operator [FS_32] table:{"name:":"default.dest2_n43"} Select Operator [SEL_30] (rows=1 width=456) Output:["_col0","_col1","_col2"] Group By Operator [GBY_29] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + <-Union 6 [SIMPLE_EDGE] + <-Map 10 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_59] + Please refer to the previous Reduce Output Operator [RS_60] + <-Map 9 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_52] + Please refer to the previous Reduce Output Operator [RS_53] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_45] + Please refer to the previous Reduce Output Operator [RS_46] PARTITION_ONLY_SHUFFLE [RS_36] Group By Operator [GBY_35] (rows=1 width=1320) Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] @@ -3862,12 +4084,12 @@ POSTHOOK: Output: default@dest2_n43 Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Union 3 (CONTAINS) -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS), Union 6 (CONTAINS) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 3 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -3889,7 +4111,7 @@ Stage-4 Group By Operator [GBY_13] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] - <-Map 8 [CONTAINS] llap + <-Map 9 [CONTAINS] llap Reduce Output Operator [RS_50] PartitionCols:_col0 Group By Operator [GBY_48] (rows=1 width=280) @@ -3930,18 +4152,24 @@ Stage-4 Select Operator [SEL_18] (rows=1 width=272) Output:["key","value"] Please refer to the previous Select Operator [SEL_14] - Reducer 7 llap + Reducer 8 llap File Output Operator [FS_37] Group By Operator [GBY_35] (rows=1 width=1320) Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap File Output Operator [FS_30] table:{"name:":"default.dest2_n43"} Select Operator [SEL_28] (rows=1 width=456) Output:["_col0","_col1","_col2"] Group By Operator [GBY_27] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + <-Union 6 [SIMPLE_EDGE] + <-Map 9 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_50] + Please refer to the previous Reduce Output Operator [RS_51] + <-Reducer 2 [CONTAINS] llap + Please refer to the previous Reduce Output Operator [RS_43] + Please refer to the previous Reduce Output Operator [RS_44] PARTITION_ONLY_SHUFFLE [RS_34] Group By Operator [GBY_33] (rows=1 width=1320) Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] diff --git a/ql/src/test/results/clientpositive/llap/multi_insert.q.out b/ql/src/test/results/clientpositive/llap/multi_insert.q.out index f1e04f8877..fbf804434b 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert.q.out @@ -1835,10 +1835,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Map 8 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1900,7 +1900,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 8 Map Operator Tree: TableScan alias: src @@ -1959,7 +1959,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1974,7 +1974,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1991,6 +1991,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 3 + Vertex: Union 3 + Union 5 + Vertex: Union 5 + Union 6 + Vertex: Union 6 Stage: Stage-3 Dependency Collection @@ -2132,10 +2138,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Map 8 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2197,7 +2203,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 8 Map Operator Tree: TableScan alias: src @@ -2256,7 +2262,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2271,7 +2277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2288,6 +2294,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 3 + Vertex: Union 3 + Union 5 + Vertex: Union 5 + Union 6 + Vertex: Union 6 Stage: Stage-3 Dependency Collection @@ -2429,10 +2441,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Map 8 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2494,7 +2506,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 8 Map Operator Tree: TableScan alias: src @@ -2553,7 +2565,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2568,7 +2580,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2585,6 +2597,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 3 + Vertex: Union 3 + Union 5 + Vertex: Union 5 + Union 6 + Vertex: Union 6 Stage: Stage-3 Dependency Collection @@ -2726,10 +2744,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Map 8 <- Union 2 (CONTAINS), Union 3 (CONTAINS), Union 5 (CONTAINS), Union 6 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2791,7 +2809,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 8 Map Operator Tree: TableScan alias: src @@ -2850,7 +2868,7 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2865,7 +2883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2882,6 +2900,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 3 + Vertex: Union 3 + Union 5 + Vertex: Union 5 + Union 6 + Vertex: Union 6 Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index b2202ad776..e52a56b096 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -58,9 +58,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 3 (CONTAINS) + Map 5 <- Union 2 (CONTAINS), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -102,7 +102,7 @@ STAGE PLANS: value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: dummy_n2 @@ -141,7 +141,7 @@ STAGE PLANS: value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -163,6 +163,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 3 + Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out index 0d61e292e7..ada70c67fc 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out @@ -71,9 +71,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS), Union 3 (CONTAINS) + Map 5 <- Union 2 (CONTAINS), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -115,7 +115,7 @@ STAGE PLANS: value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: dummy_n7 @@ -154,7 +154,7 @@ STAGE PLANS: value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -176,6 +176,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 + Union 3 + Vertex: Union 3 Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 05d259b0d9..556a222158 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -59,13 +59,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 8 <- Union 3 (CONTAINS) - Map 9 <- Union 3 (CONTAINS) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Map 10 <- Union 3 (CONTAINS), Union 6 (CONTAINS) + Map 9 <- Union 3 (CONTAINS), Union 6 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Union 3 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -86,10 +86,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 10 Map Operator Tree: TableScan - alias: s2 + alias: s0 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -122,7 +122,7 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: s0 + alias: s2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -235,7 +235,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -269,7 +269,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -286,6 +286,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 + Union 6 + Vertex: Union 6 Stage: Stage-3 Dependency Collection @@ -1040,13 +1042,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 9 <- Union 2 (CONTAINS) + Map 1 <- Union 2 (CONTAINS), Union 5 (CONTAINS) + Map 10 <- Union 2 (CONTAINS), Union 5 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS), Union 5 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1082,25 +1084,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 9 + Map 10 Map Operator Tree: TableScan alias: s2 @@ -1133,6 +1117,24 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1182,7 +1184,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1216,7 +1218,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1231,7 +1233,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1267,6 +1269,8 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 + Union 5 + Vertex: Union 5 Stage: Stage-3 Dependency Collection @@ -2017,13 +2021,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 9 <- Union 2 (CONTAINS) + Map 1 <- Union 2 (CONTAINS), Union 5 (CONTAINS) + Map 10 <- Union 2 (CONTAINS), Union 5 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS), Union 5 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -2059,25 +2063,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 9 + Map 10 Map Operator Tree: TableScan alias: s2 @@ -2110,6 +2096,24 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2159,7 +2163,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2193,7 +2197,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2208,7 +2212,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2244,6 +2248,8 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 + Union 5 + Vertex: Union 5 Stage: Stage-3 Dependency Collection @@ -2986,12 +2992,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 8 <- Union 3 (CONTAINS) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Map 9 <- Union 3 (CONTAINS), Union 6 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 6 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Union 3 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3012,7 +3018,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -3128,7 +3134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3162,7 +3168,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3179,6 +3185,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 + Union 6 + Vertex: Union 6 Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/union4.q.out b/ql/src/test/results/clientpositive/llap/union4.q.out index eb8c9009e5..f1ba39a027 100644 --- a/ql/src/test/results/clientpositive/llap/union4.q.out +++ b/ql/src/test/results/clientpositive/llap/union4.q.out @@ -33,9 +33,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +56,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -111,7 +111,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -126,7 +126,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -165,6 +165,8 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/union6.q.out b/ql/src/test/results/clientpositive/llap/union6.q.out index b120098abc..94b7e30dc3 100644 --- a/ql/src/test/results/clientpositive/llap/union6.q.out +++ b/ql/src/test/results/clientpositive/llap/union6.q.out @@ -35,9 +35,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Map 6 <- Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,7 +58,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -123,7 +123,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -140,6 +140,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 93791ac8e0..c71bbeab4b 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -6984,7 +6984,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS), Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: diff --git a/ql/src/test/results/clientpositive/llap/union_top_level.q.out b/ql/src/test/results/clientpositive/llap/union_top_level.q.out index 5083678249..2309618187 100644 --- a/ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -669,10 +669,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -697,7 +697,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -719,7 +719,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -776,7 +776,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -791,7 +791,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Select Operator @@ -826,7 +826,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Select Operator @@ -863,6 +863,8 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-2 Dependency Collection @@ -953,10 +955,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -981,7 +983,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -1003,7 +1005,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -1060,7 +1062,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1075,7 +1077,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1110,7 +1112,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1147,6 +1149,8 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index 50fa078159..fd5f6d7521 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with my_customers as ( diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 8d10899c63..7a0750eda3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -133,8 +133,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 18 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) -Map 24 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) +Map 1 <- Reducer 24 (BROADCAST_EDGE) +Map 18 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS) +Map 25 <- Reducer 27 (BROADCAST_EDGE), Union 19 (CONTAINS) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE) @@ -143,15 +144,16 @@ Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 11 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) -Reducer 21 <- Map 27 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 28 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 20 <- Map 26 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) +Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) -Reducer 4 <- Map 29 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 31 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) +Reducer 4 <- Map 30 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 32 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) @@ -162,25 +164,25 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_353] - Limit [LIM_352] (rows=1 width=16) + File Output Operator [FS_358] + Limit [LIM_357] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_351] (rows=1 width=16) + Select Operator [SEL_356] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] - Select Operator [SEL_349] (rows=1 width=16) + SHUFFLE [RS_355] + Select Operator [SEL_354] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_348] (rows=1 width=12) + Group By Operator [GBY_353] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + SHUFFLE [RS_352] PartitionCols:_col0 - Group By Operator [GBY_346] (rows=1 width=12) + Group By Operator [GBY_351] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_345] (rows=1 width=116) + Select Operator [SEL_350] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_344] (rows=1 width=116) + Group By Operator [GBY_349] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] @@ -200,42 +202,42 @@ Stage-0 Merge Join Operator [MERGEJOIN_277] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - Group By Operator [GBY_334] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_340] + Group By Operator [GBY_339] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] + SHUFFLE [RS_320] PartitionCols:_col0 - Group By Operator [GBY_298] (rows=25 width=4) + Group By Operator [GBY_317] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_295] (rows=50 width=12) + Select Operator [SEL_314] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_293] (rows=50 width=12) + Filter Operator [FIL_312] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_26] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_343] - Select Operator [SEL_342] (rows=1 width=8) - Filter Operator [FIL_341] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_348] + Select Operator [SEL_347] (rows=1 width=8) + Filter Operator [FIL_346] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_340] (rows=1 width=8) + Group By Operator [GBY_345] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Group By Operator [GBY_338] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_343] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_337] (rows=25 width=4) - Group By Operator [GBY_336] (rows=25 width=4) + Select Operator [SEL_342] (rows=25 width=4) + Group By Operator [GBY_341] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] + SHUFFLE [RS_321] PartitionCols:_col0 - Group By Operator [GBY_299] (rows=25 width=4) + Group By Operator [GBY_318] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_296] (rows=50 width=12) + Select Operator [SEL_315] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_293] + Please refer to the previous Filter Operator [FIL_312] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] Select Operator [SEL_107] (rows=224732600 width=119) @@ -243,41 +245,41 @@ Stage-0 Merge Join Operator [MERGEJOIN_278] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col7","_col13"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_303] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_324] + Group By Operator [GBY_322] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] + SHUFFLE [RS_319] PartitionCols:_col0 - Group By Operator [GBY_297] (rows=25 width=4) + Group By Operator [GBY_316] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_294] (rows=50 width=12) + Select Operator [SEL_313] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_293] + Please refer to the previous Filter Operator [FIL_312] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_104] Merge Join Operator [MERGEJOIN_276] (rows=8989304 width=8) Conds:RS_101._col5=RS_102._col0(Inner),Output:["_col2","_col4","_col7"] - <-Reducer 31 [SIMPLE_EDGE] + <-Reducer 32 [SIMPLE_EDGE] SHUFFLE [RS_102] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_275] (rows=39720279 width=4) - Conds:RS_330._col1, _col2=RS_333._col0, _col1(Inner),Output:["_col0"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + Conds:RS_335._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col0"] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_335] PartitionCols:_col1, _col2 - Select Operator [SEL_329] (rows=40000000 width=188) + Select Operator [SEL_334] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_328] (rows=40000000 width=188) + Filter Operator [FIL_333] (rows=40000000 width=188) predicate:(ca_county is not null and ca_state is not null) TableScan [TS_74] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] PartitionCols:_col0, _col1 - Select Operator [SEL_332] (rows=1704 width=184) + Select Operator [SEL_337] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_331] (rows=1704 width=184) + Filter Operator [FIL_336] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_77] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] @@ -285,11 +287,11 @@ Stage-0 SHUFFLE [RS_101] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_274] (rows=8989304 width=12) - Conds:RS_98._col0=RS_327._col0(Inner),Output:["_col2","_col4","_col5","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + Conds:RS_98._col0=RS_332._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] PartitionCols:_col0 - Select Operator [SEL_326] (rows=73049 width=8) + Select Operator [SEL_331] (rows=73049 width=8) Output:["_col0","_col1"] TableScan [TS_72] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] @@ -297,37 +299,11 @@ Stage-0 SHUFFLE [RS_98] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_273] (rows=8989304 width=8) - Conds:RS_95._col1=RS_325._col0(Inner),Output:["_col0","_col2","_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_292] - Select Operator [SEL_291] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_290] (rows=525327388 width=114) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_311] - Select Operator [SEL_310] (rows=1 width=8) - Filter Operator [FIL_309] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_308] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_307] - Group By Operator [GBY_306] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_304] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_303] + Conds:RS_95._col1=RS_303._col0(Inner),Output:["_col0","_col2","_col4","_col5"] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] + SHUFFLE [RS_303] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=55046 width=8) + Group By Operator [GBY_302] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -335,13 +311,13 @@ Stage-0 Group By Operator [GBY_68] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col5, _col6 Merge Join Operator [MERGEJOIN_272] (rows=110092 width=8) - Conds:RS_64._col1=RS_323._col0(Inner),Output:["_col5","_col6"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] + Conds:RS_64._col1=RS_301._col0(Inner),Output:["_col5","_col6"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] PartitionCols:_col0 - Select Operator [SEL_322] (rows=80000000 width=8) + Select Operator [SEL_300] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=80000000 width=8) + Filter Operator [FIL_299] (rows=80000000 width=8) predicate:c_current_addr_sk is not null TableScan [TS_55] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] @@ -349,13 +325,13 @@ Stage-0 SHUFFLE [RS_64] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_271] (rows=110092 width=0) - Conds:RS_61._col2=RS_320._col0(Inner),Output:["_col1"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Conds:RS_61._col2=RS_298._col0(Inner),Output:["_col1"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] PartitionCols:_col0 - Select Operator [SEL_319] (rows=453 width=4) + Select Operator [SEL_297] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_318] (rows=453 width=186) + Filter Operator [FIL_296] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) TableScan [TS_52] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] @@ -363,47 +339,84 @@ Stage-0 SHUFFLE [RS_61] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_270] (rows=11665117 width=7) - Conds:Union 19._col0=RS_314._col0(Inner),Output:["_col1","_col2"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_314] + Conds:Union 19._col0=RS_292._col0(Inner),Output:["_col1","_col2"] + <-Map 26 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_313] (rows=50 width=4) + Select Operator [SEL_291] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_312] (rows=50 width=12) + Filter Operator [FIL_290] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_49] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Union 19 [SIMPLE_EDGE] <-Map 18 [CONTAINS] vectorized - Reduce Output Operator [RS_359] + Reduce Output Operator [RS_364] PartitionCols:_col0 - Select Operator [SEL_358] (rows=285117831 width=11) + Select Operator [SEL_363] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_357] (rows=285117831 width=11) + Filter Operator [FIL_362] (rows=285117831 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_280] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_355] - Group By Operator [GBY_354] (rows=1 width=12) + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_360] + Group By Operator [GBY_359] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_317] - Group By Operator [GBY_316] (rows=1 width=12) + <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_295] + Group By Operator [GBY_294] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_315] (rows=50 width=4) + Select Operator [SEL_293] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_313] - <-Map 24 [CONTAINS] vectorized - Reduce Output Operator [RS_362] + Please refer to the previous Select Operator [SEL_291] + <-Map 25 [CONTAINS] vectorized + Reduce Output Operator [RS_367] PartitionCols:_col0 - Select Operator [SEL_361] (rows=143930993 width=11) + Select Operator [SEL_366] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_360] (rows=143930993 width=11) + Filter Operator [FIL_365] (rows=143930993 width=11) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) TableScan [TS_285] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_356] - Please refer to the previous Group By Operator [GBY_354] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_361] + Please refer to the previous Group By Operator [GBY_359] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_95] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + Select Operator [SEL_310] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_309] (rows=525327388 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_96_customer_c_customer_sk_min) AND DynamicValue(RS_96_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_96_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_23] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_308] + Group By Operator [GBY_307] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_304] (rows=55046 width=4) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_302] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_330] + Select Operator [SEL_329] (rows=1 width=8) + Filter Operator [FIL_328] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_327] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_326] + Group By Operator [GBY_325] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_323] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_322] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out index f0080995fb..d97f9df397 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Reducer 11 <- Union 10 (SIMPLE_EDGE) -Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Reducer 10 <- Union 9 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=1 width=200) + File Output Operator [FS_146] + Limit [LIM_145] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_147] (rows=1 width=200) + Select Operator [SEL_144] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=1 width=200) + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -263,47 +263,47 @@ Stage-0 keys:_col6,sort order:+,top n:100 Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] + Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_133] PartitionCols:_col2 - Select Operator [SEL_143] (rows=1704 width=276) + Select Operator [SEL_132] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_142] (rows=1704 width=181) + Filter Operator [FIL_131] (rows=1704 width=181) predicate:substr(s_zip, 1, 2) is not null TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_140] (rows=1 width=184) + Select Operator [SEL_129] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_139] (rows=1 width=192) + Filter Operator [FIL_128] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_138] (rows=3098 width=192) + Group By Operator [GBY_127] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Union 10 [SIMPLE_EDGE] + <-Union 9 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_170] + Reduce Output Operator [RS_167] PartitionCols:_col0 - Group By Operator [GBY_169] (rows=3098 width=192) + Group By Operator [GBY_166] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_168] (rows=1126 width=192) + Group By Operator [GBY_165] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_164] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=1126 width=192) + Group By Operator [GBY_163] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_165] (rows=2253 width=97) + Select Operator [SEL_162] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_164] (rows=2253 width=97) + Filter Operator [FIL_161] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_163] (rows=6761 width=97) + Group By Operator [GBY_160] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] @@ -311,40 +311,40 @@ Stage-0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] + Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_158] (rows=40000000 width=93) + Select Operator [SEL_155] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=40000000 width=93) + Filter Operator [FIL_154] (rows=40000000 width=93) predicate:substr(substr(ca_zip, 1, 5), 1, 2) is not null TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_161] (rows=26666667 width=4) + Select Operator [SEL_158] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_160] (rows=26666667 width=89) + Filter Operator [FIL_157] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] - <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_156] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_153] PartitionCols:_col0 - Group By Operator [GBY_155] (rows=3098 width=192) + Group By Operator [GBY_152] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_154] (rows=5071 width=192) + Group By Operator [GBY_151] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=70994 width=192) + Group By Operator [GBY_149] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_151] (rows=20000000 width=89) + Select Operator [SEL_148] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_150] (rows=20000000 width=89) + Filter Operator [FIL_147] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] @@ -352,34 +352,34 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) - Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_129] - PartitionCols:_col0 - Select Operator [SEL_128] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_127] (rows=130 width=12) - predicate:((d_qoy = 1) and (d_year = 2002)) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_136] (rows=525329897 width=114) + Select Operator [SEL_137] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_135] (rows=525329897 width=114) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_136] (rows=525329897 width=114) + predicate:((ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=130 width=4) + Select Operator [SEL_90] (rows=1 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_128] + Please refer to the previous Merge Join Operator [MERGEJOIN_117] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] + PartitionCols:_col0 + Select Operator [SEL_140] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_139] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index a029634671..a706e9459e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with my_customers as ( @@ -133,26 +133,28 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 15 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) -Map 21 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 17 <- Map 22 (SIMPLE_EDGE), Union 16 (SIMPLE_EDGE) -Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE) +Map 16 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) +Map 22 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 26 (SIMPLE_EDGE) +Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 26 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 27 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 27 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -162,25 +164,25 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_349] - Limit [LIM_348] (rows=1 width=16) + File Output Operator [FS_351] + Limit [LIM_350] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_347] (rows=1 width=16) + Select Operator [SEL_349] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] - Select Operator [SEL_345] (rows=1 width=16) + SHUFFLE [RS_348] + Select Operator [SEL_347] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_344] (rows=1 width=12) + Group By Operator [GBY_346] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] + SHUFFLE [RS_345] PartitionCols:_col0 - Group By Operator [GBY_342] (rows=1 width=12) + Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_341] (rows=1 width=116) + Select Operator [SEL_343] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_340] (rows=1 width=116) + Group By Operator [GBY_342] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_119] @@ -195,82 +197,82 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"] Merge Join Operator [MERGEJOIN_273] (rows=5618315000 width=127) Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] Merge Join Operator [MERGEJOIN_270] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - Group By Operator [GBY_330] (rows=25 width=4) + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_333] + Group By Operator [GBY_332] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] PartitionCols:_col0 - Group By Operator [GBY_316] (rows=25 width=4) + Group By Operator [GBY_318] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_313] (rows=50 width=12) + Select Operator [SEL_315] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_311] (rows=50 width=12) + Filter Operator [FIL_313] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_73] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Select Operator [SEL_338] (rows=1 width=8) - Filter Operator [FIL_337] (rows=1 width=8) + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Select Operator [SEL_340] (rows=1 width=8) + Filter Operator [FIL_339] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_336] (rows=1 width=8) + Group By Operator [GBY_338] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - Group By Operator [GBY_334] (rows=1 width=8) + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + Group By Operator [GBY_336] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_333] (rows=25 width=4) - Group By Operator [GBY_332] (rows=25 width=4) + Select Operator [SEL_335] (rows=25 width=4) + Group By Operator [GBY_334] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] PartitionCols:_col0 - Group By Operator [GBY_317] (rows=25 width=4) + Group By Operator [GBY_319] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_314] (rows=50 width=12) + Select Operator [SEL_316] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_311] + Please refer to the previous Filter Operator [FIL_313] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_113] Select Operator [SEL_108] (rows=224732600 width=119) Output:["_col0","_col4","_col11","_col13"] Merge Join Operator [MERGEJOIN_272] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_323] - Group By Operator [GBY_321] (rows=25 width=4) + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_325] + Group By Operator [GBY_323] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Group By Operator [GBY_315] (rows=25 width=4) + Group By Operator [GBY_317] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_312] (rows=50 width=12) + Select Operator [SEL_314] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_311] + Please refer to the previous Filter Operator [FIL_313] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_105] Merge Join Operator [MERGEJOIN_271] (rows=8989304 width=8) Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - Select Operator [SEL_328] (rows=1 width=8) - Filter Operator [FIL_327] (rows=1 width=8) + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_331] + Select Operator [SEL_330] (rows=1 width=8) + Filter Operator [FIL_329] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_326] (rows=1 width=8) + Group By Operator [GBY_328] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] - Group By Operator [GBY_324] (rows=1 width=8) + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_327] + Group By Operator [GBY_326] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_322] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_321] + Select Operator [SEL_324] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_323] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_102] Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) @@ -279,134 +281,145 @@ Stage-0 SHUFFLE [RS_100] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_268] (rows=55046 width=4) - Conds:RS_69._col0=RS_310._col1(Inner),Output:["_col5"] + Conds:RS_69._col0=RS_304._col1(Inner),Output:["_col5"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_264] (rows=39720279 width=4) - Conds:RS_292._col1, _col2=RS_295._col0, _col1(Inner),Output:["_col0"] + Conds:RS_286._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col0"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] + SHUFFLE [RS_286] PartitionCols:_col1, _col2 - Select Operator [SEL_291] (rows=40000000 width=188) + Select Operator [SEL_285] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_290] (rows=40000000 width=188) + Filter Operator [FIL_284] (rows=40000000 width=188) predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) TableScan [TS_29] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] PartitionCols:_col0, _col1 - Select Operator [SEL_294] (rows=1704 width=184) + Select Operator [SEL_288] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_293] (rows=1704 width=184) + Filter Operator [FIL_287] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_32] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] PartitionCols:_col1 - Select Operator [SEL_309] (rows=55046 width=8) + Select Operator [SEL_303] (rows=55046 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_308] (rows=55046 width=8) + Group By Operator [GBY_302] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 19 [SIMPLE_EDGE] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1 Group By Operator [GBY_62] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col6, _col5 Merge Join Operator [MERGEJOIN_267] (rows=110092 width=8) - Conds:RS_58._col1=RS_307._col0(Inner),Output:["_col5","_col6"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Conds:RS_58._col1=RS_301._col0(Inner),Output:["_col5","_col6"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] PartitionCols:_col0 - Select Operator [SEL_306] (rows=80000000 width=8) + Select Operator [SEL_300] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_305] (rows=80000000 width=8) + Filter Operator [FIL_299] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_49] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_266] (rows=110092 width=0) - Conds:RS_55._col2=RS_304._col0(Inner),Output:["_col1"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] + Conds:RS_55._col2=RS_298._col0(Inner),Output:["_col1"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] PartitionCols:_col0 - Select Operator [SEL_303] (rows=453 width=4) + Select Operator [SEL_297] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_302] (rows=453 width=186) + Filter Operator [FIL_296] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) TableScan [TS_46] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_265] (rows=11665117 width=7) - Conds:Union 16._col0=RS_298._col0(Inner),Output:["_col1","_col2"] - <-Map 22 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_298] + Conds:Union 17._col0=RS_292._col0(Inner),Output:["_col1","_col2"] + <-Map 23 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_297] (rows=50 width=4) + Select Operator [SEL_291] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_296] (rows=50 width=12) + Filter Operator [FIL_290] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_43] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 16 [SIMPLE_EDGE] - <-Map 15 [CONTAINS] vectorized - Reduce Output Operator [RS_355] + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] vectorized + Reduce Output Operator [RS_357] PartitionCols:_col0 - Select Operator [SEL_354] (rows=285117831 width=11) + Select Operator [SEL_356] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_353] (rows=285117831 width=11) + Filter Operator [FIL_355] (rows=285117831 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_274] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_351] - Group By Operator [GBY_350] (rows=1 width=12) + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_353] + Group By Operator [GBY_352] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_301] - Group By Operator [GBY_300] (rows=1 width=12) + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_295] + Group By Operator [GBY_294] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_299] (rows=50 width=4) + Select Operator [SEL_293] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_297] - <-Map 21 [CONTAINS] vectorized - Reduce Output Operator [RS_358] + Please refer to the previous Select Operator [SEL_291] + <-Map 22 [CONTAINS] vectorized + Reduce Output Operator [RS_360] PartitionCols:_col0 - Select Operator [SEL_357] (rows=143930993 width=11) + Select Operator [SEL_359] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_356] (rows=143930993 width=11) + Filter Operator [FIL_358] (rows=143930993 width=11) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) TableScan [TS_279] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_352] - Please refer to the previous Group By Operator [GBY_350] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_354] + Please refer to the previous Group By Operator [GBY_352] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_99] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_263] (rows=525327388 width=114) - Conds:RS_286._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col4"] + Conds:RS_309._col0=RS_312._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + SHUFFLE [RS_309] PartitionCols:_col0 - Select Operator [SEL_285] (rows=525327388 width=114) + Select Operator [SEL_308] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_284] (rows=525327388 width=114) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + Filter Operator [FIL_307] (rows=525327388 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_100_customer_c_customer_sk_min) AND DynamicValue(RS_100_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_100_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_23] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_181] (rows=55046 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_268] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_312] PartitionCols:_col0 - Select Operator [SEL_288] (rows=73049 width=8) + Select Operator [SEL_311] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_287] (rows=73049 width=8) + Filter Operator [FIL_310] (rows=73049 width=8) predicate:d_date_sk is not null TableScan [TS_26] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out index da49d2edb3..9eb50396f9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Reducer 11 <- Union 10 (SIMPLE_EDGE) -Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Reducer 10 <- Union 9 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=1 width=200) + File Output Operator [FS_146] + Limit [LIM_145] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_147] (rows=1 width=200) + Select Operator [SEL_144] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=1 width=200) + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -263,47 +263,47 @@ Stage-0 keys:_col6,sort order:+,top n:100 Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] + Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_133] PartitionCols:_col2 - Select Operator [SEL_143] (rows=1704 width=276) + Select Operator [SEL_132] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_142] (rows=1704 width=181) + Filter Operator [FIL_131] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_140] (rows=1 width=184) + Select Operator [SEL_129] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_139] (rows=1 width=192) + Filter Operator [FIL_128] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_138] (rows=3098 width=192) + Group By Operator [GBY_127] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Union 10 [SIMPLE_EDGE] + <-Union 9 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_170] + Reduce Output Operator [RS_167] PartitionCols:_col0 - Group By Operator [GBY_169] (rows=3098 width=192) + Group By Operator [GBY_166] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_168] (rows=1126 width=192) + Group By Operator [GBY_165] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_164] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=1126 width=192) + Group By Operator [GBY_163] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_165] (rows=2253 width=97) + Select Operator [SEL_162] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_164] (rows=2253 width=97) + Filter Operator [FIL_161] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_163] (rows=6761 width=97) + Group By Operator [GBY_160] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] @@ -311,40 +311,40 @@ Stage-0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] + Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_158] (rows=40000000 width=93) + Select Operator [SEL_155] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=40000000 width=93) + Filter Operator [FIL_154] (rows=40000000 width=93) predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_161] (rows=26666667 width=4) + Select Operator [SEL_158] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_160] (rows=26666667 width=89) + Filter Operator [FIL_157] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] - <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_156] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_153] PartitionCols:_col0 - Group By Operator [GBY_155] (rows=3098 width=192) + Group By Operator [GBY_152] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_154] (rows=5071 width=192) + Group By Operator [GBY_151] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=70994 width=192) + Group By Operator [GBY_149] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_151] (rows=20000000 width=89) + Select Operator [SEL_148] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_150] (rows=20000000 width=89) + Filter Operator [FIL_147] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] @@ -352,34 +352,34 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) - Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_129] - PartitionCols:_col0 - Select Operator [SEL_128] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_127] (rows=130 width=12) - predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_136] (rows=525329897 width=114) + Select Operator [SEL_137] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_135] (rows=525329897 width=114) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_136] (rows=525329897 width=114) + predicate:((ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=130 width=4) + Select Operator [SEL_90] (rows=1 width=8) Output:["_col0"] - Please refer to the previous Select Operator [SEL_128] + Please refer to the previous Merge Join Operator [MERGEJOIN_117] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] + PartitionCols:_col0 + Select Operator [SEL_140] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_139] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"]