diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 18e4f7f02c..0833deb81a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -635,6 +635,7 @@ minillaplocal.query.files=\ ptf_streaming.q,\ runtime_stats_merge.q,\ quotedid_smb.q,\ + reducesink_dedup.q,\ resourceplan.q,\ results_cache_1.q,\ results_cache_2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 6fd1093e6d..64f86e2948 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -501,10 +501,9 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, order = 0; } } - for (int i = 0; i < numPartAndBuck; i++) { + for (int i = 0; i < keyColsPosInVal.size(); i++) { newSortOrder.add(order); } - newSortOrder.addAll(sortOrder); String orderStr = ""; for (Integer i : newSortOrder) { @@ -525,10 +524,9 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, nullOrder = 1; } } - for (int i = 0; i < numPartAndBuck; i++) { + for (int i = 0; i < keyColsPosInVal.size(); i++) { newSortNullOrder.add(nullOrder); } - newSortNullOrder.addAll(sortNullOrder); String nullOrderStr = ""; for (Integer i : newSortNullOrder) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java index 6fcd8d2a31..c553dcaa88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java @@ -387,7 +387,7 @@ protected static SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOp SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), childRS.getConf().getOutputValueColumnNames()); Operator parent = getSingleParent(childRS); - parent.getChildOperators().clear(); + parent.removeChild(childRS); SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild( select, new RowSchema(inputRS.getSignature()), parent); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java index 7ccd4a3725..6919da8586 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java @@ -475,6 +475,40 @@ protected static Integer checkNumReducer(int creduce, int preduce) { return 0; } + // Check that in the path between cRS and pRS, there are only Select operators + // i.e. the sequence must be pRS-SEL*-cRS + // ensure SEL does not branch + protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { + Operator parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (!(parent instanceof SelectOperator)) { + return false; + } + if (parent.getChildOperators().size() > 1) { + return false; + } + + parent = parent.getParentOperators().get(0); + } + return true; + } + + // Check that in the path between cRS and pRS, there are only single branch + // i.e. the sequence must be pRS-Op*-cRS + protected static boolean checkSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { + Operator parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (parent.getChildOperators().size() > 1) { + return false; + } + + parent = parent.getParentOperators().get(0); + } + return true; + } + protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { assert cRS.getNumParent() == 1; @@ -484,15 +518,8 @@ protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOpera List cKeys = cConf.getKeyCols(); List pKeys = pConf.getKeyCols(); - // Check that in the path between cRS and pRS, there are only Select operators - // i.e. the sequence must be pRS-SEL*-cRS - Operator parent = cRS.getParentOperators().get(0); - while (parent != pRS) { - assert parent.getNumParent() == 1; - if (!(parent instanceof SelectOperator)) { - return false; - } - parent = parent.getParentOperators().get(0); + if (!checkSelectSingleBranchOnly(cRS, pRS)) { + return false; } // If child keys are null or empty, we bail out @@ -564,7 +591,7 @@ protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOpera // Replace pRS with cRS and remove operator sequence from pRS to cRS // Recall that the sequence must be pRS-SEL*-cRS - parent = cRS.getParentOperators().get(0); + Operator parent = cRS.getParentOperators().get(0); while (parent != pRS) { dedupCtx.addRemovedOperator(parent); parent = parent.getParentOperators().get(0); diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q b/ql/src/test/queries/clientpositive/reducesink_dedup.q index 352a558bea..b7f9a014d5 100644 --- a/ql/src/test/queries/clientpositive/reducesink_dedup.q +++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -1,5 +1,13 @@ --! qt:dataset:part -select p_name +--! qt:dataset:src +select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 -; \ No newline at end of file +; + +create temporary table d1 (key int); +create temporary table d2 (key int); + +explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/bucket2.q.out b/ql/src/test/results/clientpositive/bucket2.q.out index aec4ceac3f..98113d83ed 100644 --- a/ql/src/test/results/clientpositive/bucket2.q.out +++ b/ql/src/test/results/clientpositive/bucket2.q.out @@ -150,30 +150,30 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: struct), _col1 (type: struct) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 columns.types struct:struct - escape.delim \ + escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index fc782b4590..8cb9dd1d85 100644 --- a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -43,22 +43,48 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - sort order: ++ + sort order: + Map-reduce partition columns: _col2 (type: string) - value expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) + expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: c1, c2, p1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.implicit_cast_during_insert + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + keys: p1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.implicit_cast_during_insert Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index 53337fe356..6482202b7e 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -170,6 +170,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -186,13 +188,61 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col4 (type: tinyint), _col0 (type: smallint) - sort order: ++ - Map-reduce partition columns: _col4 (type: tinyint) - value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Output Operator + key expressions: _col4 (type: tinyint), _col0 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -616,6 +666,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -632,13 +684,61 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col4 (type: tinyint), _col0 (type: smallint) - sort order: ++ - Map-reduce partition columns: _col4 (type: tinyint) - value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Output Operator + key expressions: _col4 (type: tinyint), _col0 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -1566,6 +1666,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1582,13 +1684,61 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col4 (type: tinyint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: tinyint) - value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Output Operator + key expressions: _col4 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index cb3704f549..01d7a12408 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -1597,6 +1597,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1613,13 +1615,61 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: tinyint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: tinyint) - value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Output Operator + key expressions: _col4 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index 30074abaf2..42d13b5a62 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -87,6 +87,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +113,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) @@ -122,15 +124,62 @@ STAGE PLANS: expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + keys: ss_sold_date_sk (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: float), _col1 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.ss_part + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part Stage: Stage-2 Dependency Collection @@ -334,7 +383,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -351,13 +402,61 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) - sort order: ++ + sort order: Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: float), _col1 (type: float) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + keys: ss_sold_date_sk (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: float), _col1 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -576,6 +675,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -626,20 +726,35 @@ STAGE PLANS: Group By Operator aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) - mode: complete + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -843,7 +958,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -860,8 +976,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + + sort order: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) @@ -889,20 +1004,35 @@ STAGE PLANS: Group By Operator aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) - mode: complete + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1162,6 +1292,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1212,20 +1343,35 @@ STAGE PLANS: Group By Operator aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) - mode: complete + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1429,7 +1575,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,8 +1593,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + + sort order: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) @@ -1475,20 +1621,35 @@ STAGE PLANS: Group By Operator aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') keys: ss_sold_date_sk (type: int) - mode: complete + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out index 6a3ded3376..59d5dac074 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -105,16 +105,16 @@ STAGE PLANS: expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator + Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) - Reducer 4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out new file mode 100644 index 0000000000..fbc75ce516 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out @@ -0,0 +1,163 @@ +PREHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse lavender yellow +almond antique salmon chartreuse burlywood +almond aquamarine burnished black steel +almond aquamarine pink moccasin thistle +almond antique violet chocolate turquoise +almond antique violet turquoise frosted +almond aquamarine midnight light salmon +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond antique chartreuse khaki white +almond antique forest lavender goldenrod +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique gainsboro frosted violet +almond antique violet mint lemon +almond aquamarine floral ivory bisque +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond antique blue firebrick mint +almond antique medium spring khaki +almond antique sky peru orange +almond aquamarine dodger light gainsboro +almond azure blanched chiffon midnight +PREHOOK: query: create temporary table d1 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d1 +POSTHOOK: query: create temporary table d1 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d1 +PREHOOK: query: create temporary table d2 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d2 +POSTHOOK: query: create temporary table d2 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d2 +PREHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@d1 +PREHOOK: Output: default@d2 +POSTHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@d1 +POSTHOOK: Output: default@d2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + + Stage: Stage-5 + Stats Work + Basic Stats Work: + diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 7f2cc85392..167873f347 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -1050,7 +1050,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 8 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -1135,11 +1135,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 115000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') - mode: hash + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 @@ -2880,7 +2880,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 7 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -2954,11 +2954,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 69000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator @@ -3001,7 +3001,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -9777,7 +9777,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Map 7 + Map 7 Map Operator Tree: TableScan alias: t2_n58 @@ -9813,33 +9813,33 @@ STAGE PLANS: aggregations: count(1) keys: _col0 (type: string) mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t3_n22 - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: key, cnt + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3_n22 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string) outputColumnNames: _col1 @@ -9856,7 +9856,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -9871,7 +9871,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -9905,7 +9905,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) - Reducer 6 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/merge3.q.out b/ql/src/test/results/clientpositive/merge3.q.out index fc8afcf193..b66b32ca26 100644 --- a/ql/src/test/results/clientpositive/merge3.q.out +++ b/ql/src/test/results/clientpositive/merge3.q.out @@ -5023,11 +5023,12 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: aa - sort order: ++ + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: string), _col1 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -5135,38 +5136,78 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - Dp Sort State: PARTITION_SORTED - NumFilesPerFileSink: 1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - name default.merge_src_part2 - partition_columns ds - partition_columns.types string - serialization.ddl struct merge_src_part2 { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.merge_src_part2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Dp Sort State: PARTITION_SORTED + NumFilesPerFileSink: 1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.merge_src_part2 + partition_columns ds + partition_columns.types string + serialization.ddl struct merge_src_part2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_src_part2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-7 Conditional Operator diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/reducesink_dedup.q.out index b89df52f96..2b068ac424 100644 --- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out +++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: select p_name +PREHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_name +POSTHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 POSTHOOK: type: QUERY @@ -36,3 +36,201 @@ almond antique medium spring khaki almond antique sky peru orange almond aquamarine dodger light gainsboro almond azure blanched chiffon midnight +PREHOOK: query: create temporary table d1 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d1 +POSTHOOK: query: create temporary table d1 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d1 +PREHOOK: query: create temporary table d2 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d2 +POSTHOOK: query: create temporary table d2 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d2 +PREHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@d1 +PREHOOK: Output: default@d2 +POSTHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@d1 +POSTHOOK: Output: default@d2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +