diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 76aa39f..abad9c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -172,7 +172,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // unlink connection between FS and its parent fsParent = fsOp.getParentOperators().get(0); - fsParent.getChildOperators().clear(); + fsParent.getChildOperators().remove(fsOp); DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx(); int numBuckets = destTable.getNumBuckets(); @@ -364,7 +364,7 @@ private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { rsChild.getSchema().getSignature().size()) { return false; } - rsParent.getChildOperators().clear(); + rsParent.getChildOperators().remove(rsToRemove); rsParent.getChildOperators().add(rsGrandChild); rsGrandChild.getParentOperators().clear(); rsGrandChild.getParentOperators().add(rsParent); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 701bde4..364896c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -530,6 +530,21 @@ protected Integer checkNumReducer(int creduce, int preduce) { return 0; } + // Check that in the path between cRS and pRS, there are only Select operators + // i.e. the sequence must be pRS-SEL*-cRS + // ensure SEL does not branch + protected boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { + Operator parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (parent.getChildOperators().size() > 1) { + return false; + } + parent = parent.getParentOperators().get(0); + } + return true; + } + protected boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { assert cRS.getNumParent() == 1; @@ -539,15 +554,8 @@ protected boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS List cKeys = cConf.getKeyCols(); List pKeys = pConf.getKeyCols(); - // Check that in the path between cRS and pRS, there are only Select operators - // i.e. the sequence must be pRS-SEL*-cRS - Operator parent = cRS.getParentOperators().get(0); - while (parent != pRS) { - assert parent.getNumParent() == 1; - if (!(parent instanceof SelectOperator)) { - return false; - } - parent = parent.getParentOperators().get(0); + if (!checkSelectSingleBranchOnly(cRS, pRS)) { + return false; } // If child keys are null or empty, we bail out @@ -619,7 +627,7 @@ protected boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS // Replace pRS with cRS and remove operator sequence from pRS to cRS // Recall that the sequence must be pRS-SEL*-cRS - parent = cRS.getParentOperators().get(0); + Operator parent = cRS.getParentOperators().get(0); while (parent != pRS) { dedupCtx.addRemovedOperator(parent); parent = parent.getParentOperators().get(0); @@ -743,6 +751,9 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup CorrelationUtilities.findPossibleParent( cRS, ReduceSinkOperator.class, dedupCtx.trustScript()); if (pRS != null) { + if (!checkSelectSingleBranchOnly(cRS, pRS)) { + return false; + } // Try extended deduplication if (aggressiveDedup(cRS, pRS, dedupCtx)) { return true; diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index d07f016..036cc2f 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -208,3 +208,45 @@ insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from ov insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo"; select sum(hash(*)) from over1k_part3; + + +-- verify multiple branches in SDPO +set hive.optimize.sort.dynamic.partition=true; +set hive.optimize.reducededuplication.min.reducer=1; +-- CBO will remove the intermediate ORDER BY +set hive.cbo.enable=true; + +create table over1k_part4_0(i int) + partitioned by (s string); +create table over1k_part4_1(i int) + partitioned by (s string); + +EXPLAIN +WITH CTE AS ( + select i, s from over1k where s like 'bob%' +) +FROM ( + select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +; + +EXPLAIN +WITH CTE AS ( + select i, s from over1k where s like 'bob%' +) +FROM ( + select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +; + +select count(1) from over1k_part4_0; +select count(1) from over1k_part4_1; + diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q b/ql/src/test/queries/clientpositive/reducesink_dedup.q index d429a1f..fc3974b 100644 --- a/ql/src/test/queries/clientpositive/reducesink_dedup.q +++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -1,4 +1,11 @@ select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 -; \ No newline at end of file +; + +create temporary table d1 (key int); +create temporary table d2 (key int); + +explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key; diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 1dc9ed5..97c3dc1 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -3269,3 +3269,323 @@ POSTHOOK: Input: default@over1k_part3@s=wendy van buren/t=27/i=65680 POSTHOOK: Input: default@over1k_part3@s=xavier quirinius/t=27/i=65599 #### A masked pattern was here #### 17814641134 +PREHOOK: query: create table over1k_part4_0(i int) + partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_0 +POSTHOOK: query: create table over1k_part4_0(i int) + partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_0 +PREHOOK: query: create table over1k_part4_1(i int) + partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: create table over1k_part4_1(i int) + partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_1 +PREHOOK: query: EXPLAIN +WITH CTE AS ( + select i, s from over1k where s like 'bob%' +) +FROM ( + select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +WITH CTE AS ( + select i, s from over1k where s like 'bob%' +) +FROM ( + select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 1025 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s like 'bob%') and (i > 1)) (type: boolean) + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 + 1) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Select Operator + expressions: (_col0 + 0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: EXPLAIN +WITH CTE AS ( + select i, s from over1k where s like 'bob%' +) +FROM ( + select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +WITH CTE AS ( + select i, s from over1k where s like 'bob%' +) +FROM ( + select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 1025 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s like 'bob%') and (i > 1)) (type: boolean) + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 + 1) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Select Operator + expressions: (_col0 + 0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 170 Data size: 17685 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: select count(1) from over1k_part4_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_0 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_0 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(1) from over1k_part4_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_1 +#### A masked pattern was here #### +0 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index bb42e45..f2b99bf 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -4328,10 +4328,10 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] PTF Operator [PTF_11] (rows=26 width=223) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col0"}] - Group By Operator [GBY_8] (rows=26 width=223) - Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + Select Operator [SEL_10] (rows=26 width=223) + Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] + SHUFFLE [RS_9] PartitionCols:_col0 Group By Operator [GBY_6] (rows=26 width=223) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index fbaf1e6..3af9b67 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -927,16 +927,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 0a6d87a..b7586cf 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -1274,10 +1274,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Execution mode: llap Reduce Vectorization: @@ -1286,11 +1287,10 @@ STAGE PLANS: notVectorizedReason: PTF Operator (PTF) not supported vectorized: false Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -1325,14 +1325,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index 7f1d67b..fd039b3 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -2349,8 +2349,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2364,9 +2366,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2378,13 +2406,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2399,19 +2430,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2458,8 +2476,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2473,9 +2493,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2487,13 +2533,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2508,19 +2557,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2567,8 +2603,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2582,9 +2620,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2596,13 +2660,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2617,19 +2684,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2676,8 +2730,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2691,9 +2747,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2705,13 +2787,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2726,19 +2811,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2793,8 +2865,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2838,9 +2912,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2852,13 +2952,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2873,19 +2976,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3013,8 +3103,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3058,9 +3150,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3072,13 +3190,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3093,19 +3214,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3233,8 +3341,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3278,9 +3388,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3292,13 +3428,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3313,19 +3452,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -3453,8 +3579,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3498,9 +3626,35 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3512,13 +3666,16 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3533,19 +3690,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/ptf.q.out b/ql/src/test/results/clientpositive/spark/ptf.q.out index 2e31fbd..e9bdd6e 100644 --- a/ql/src/test/results/clientpositive/spark/ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/ptf.q.out @@ -852,7 +852,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -896,17 +896,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -941,14 +941,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 4972677..5b239b8 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -1184,7 +1184,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1257,10 +1257,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Reduce Vectorization: enabled: true @@ -1268,11 +1269,10 @@ STAGE PLANS: notVectorizedReason: PTF Operator (PTF) not supported vectorized: false Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -1307,14 +1307,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat