diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index ff35e8c082..4cc5efc8f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -187,8 +187,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // unlink connection between FS and its parent fsParent = fsOp.getParentOperators().get(0); - fsParent.getChildOperators().clear(); - + fsParent.getChildOperators().remove(fsOp); // if enforce bucketing/sorting is disabled numBuckets will not be set. // set the number of buckets here to ensure creation of empty buckets @@ -395,7 +394,7 @@ private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { return false; } } - rsParent.getChildOperators().clear(); + rsParent.getChildOperators().remove(rsToRemove); rsParent.getChildOperators().add(rsGrandChild); rsGrandChild.getParentOperators().clear(); rsGrandChild.getParentOperators().add(rsParent); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java index 7ccd4a3725..1c6df23bf0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java @@ -475,6 +475,20 @@ protected static Integer checkNumReducer(int creduce, int preduce) { return 0; } + // Check that in the path between cRS and pRS, there are only Select operators +// i.e. the sequence must be pRS-SEL*-cRS +// ensure SEL does not branch +protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { + Operator parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (parent.getChildOperators().size() > 1) { + return false; + } + parent = parent.getParentOperators().get(0); + } + return true; + } protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { assert cRS.getNumParent() == 1; @@ -484,16 +498,9 @@ protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOpera List cKeys = cConf.getKeyCols(); List pKeys = pConf.getKeyCols(); - // Check that in the path between cRS and pRS, there are only Select operators - // i.e. the sequence must be pRS-SEL*-cRS - Operator parent = cRS.getParentOperators().get(0); - while (parent != pRS) { - assert parent.getNumParent() == 1; - if (!(parent instanceof SelectOperator)) { - return false; + if (!checkSelectSingleBranchOnly(cRS, pRS)) { + return false; } - parent = parent.getParentOperators().get(0); - } // If child keys are null or empty, we bail out if (cKeys == null || cKeys.isEmpty()) { @@ -564,7 +571,7 @@ protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOpera // Replace pRS with cRS and remove operator sequence from pRS to cRS // Recall that the sequence must be pRS-SEL*-cRS - parent = cRS.getParentOperators().get(0); + Operator parent = cRS.getParentOperators().get(0); while (parent != pRS) { dedupCtx.addRemovedOperator(parent); parent = parent.getParentOperators().get(0); diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index dbeb874019..03bf10bc5e 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -245,4 +245,36 @@ explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b, set hive.optimize.sort.dynamic.partition.threshold=1; explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10; + +create table over1k_part4_0(i int) partitioned by (s string); +create table over1k_part4_1(i int) partitioned by (s string); + +EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +; + +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +; + +select count(1) from over1k_part4_0; +select count(1) from over1k_part4_1; + drop table over1k_n3; diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q b/ql/src/test/queries/clientpositive/reducesink_dedup.q index 352a558bea..5e0282ebdc 100644 --- a/ql/src/test/queries/clientpositive/reducesink_dedup.q +++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -1,5 +1,13 @@ --! qt:dataset:part -select p_name +--! qt:dataset:src +select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 -; \ No newline at end of file +; + +create temporary table d1 (key int); +create temporary table d2 (key int); + +explain from (select key from src cluster by key) a +insert overwrite table d1 select a.key +insert overwrite table d2 select a.key cluster by a.key; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index bf29a112fc..284f66afb8 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -127,6 +127,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -142,6 +143,22 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + @@ -150,6 +167,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -209,6 +246,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -243,12 +281,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -535,6 +609,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -550,6 +625,22 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + @@ -558,6 +649,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -617,6 +728,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -651,12 +763,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1447,6 +1595,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1463,13 +1613,61 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: tinyint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: tinyint) - value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Output Operator + key expressions: _col4 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1529,6 +1727,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1564,12 +1763,48 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2636,6 +2871,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2651,6 +2887,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2661,17 +2913,37 @@ STAGE PLANS: Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part3 Stage: Stage-2 @@ -2718,6 +2990,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2733,6 +3006,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27Y (type: tinyint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2741,6 +3030,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2800,6 +3109,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2815,6 +3125,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), t (type: tinyint), 100 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2823,6 +3149,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2882,6 +3228,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2897,6 +3244,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27Y (type: tinyint), 100 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2905,6 +3268,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2964,6 +3347,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2979,6 +3363,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), 100 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2987,6 +3387,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3046,6 +3466,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3061,6 +3482,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), 27Y (type: tinyint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -3069,6 +3506,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3626,6 +4083,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3641,6 +4099,22 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 352 Data size: 39072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + @@ -3649,6 +4123,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3708,6 +4202,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3742,12 +4237,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3912,6 +4443,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3946,12 +4478,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3991,6 +4559,452 @@ STAGE PLANS: Column Types: smallint, int, bigint, float Table: default.over1k_part +PREHOOK: query: create table over1k_part4_0(i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_0 +POSTHOOK: query: create table over1k_part4_0(i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_0 +PREHOOK: query: create table over1k_part4_1(i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: create table over1k_part4_1(i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_1 +PREHOOK: query: EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part4_0 +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: ((s like 'bob%') and (i > 1)) (type: boolean) + Statistics: Num rows: 1049 Data size: 105949 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((i > 1) and (s like 'bob%')) (type: boolean) + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: i, s + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(i, 'hll') + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Select Operator + expressions: (_col0 + 0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: i, s + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(i, 'hll') + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: i + Column Types: int + Table: default.over1k_part4_0 + + Stage: Stage-1 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: i + Column Types: int + Table: default.over1k_part4_1 + +PREHOOK: query: WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part4_0 +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +POSTHOOK: Output: default@over1k_part4_0@s=bob brown +POSTHOOK: Output: default@over1k_part4_0@s=bob carson +POSTHOOK: Output: default@over1k_part4_0@s=bob davidson +POSTHOOK: Output: default@over1k_part4_0@s=bob ellison +POSTHOOK: Output: default@over1k_part4_0@s=bob falkner +POSTHOOK: Output: default@over1k_part4_0@s=bob garcia +POSTHOOK: Output: default@over1k_part4_0@s=bob hernandez +POSTHOOK: Output: default@over1k_part4_0@s=bob ichabod +POSTHOOK: Output: default@over1k_part4_0@s=bob king +POSTHOOK: Output: default@over1k_part4_0@s=bob laertes +POSTHOOK: Output: default@over1k_part4_0@s=bob miller +POSTHOOK: Output: default@over1k_part4_0@s=bob ovid +POSTHOOK: Output: default@over1k_part4_0@s=bob polk +POSTHOOK: Output: default@over1k_part4_0@s=bob quirinius +POSTHOOK: Output: default@over1k_part4_0@s=bob steinbeck +POSTHOOK: Output: default@over1k_part4_0@s=bob van buren +POSTHOOK: Output: default@over1k_part4_0@s=bob white +POSTHOOK: Output: default@over1k_part4_0@s=bob xylophone +POSTHOOK: Output: default@over1k_part4_0@s=bob young +POSTHOOK: Output: default@over1k_part4_0@s=bob zipper +POSTHOOK: Output: default@over1k_part4_1@s=bob brown +POSTHOOK: Output: default@over1k_part4_1@s=bob carson +POSTHOOK: Output: default@over1k_part4_1@s=bob davidson +POSTHOOK: Output: default@over1k_part4_1@s=bob ellison +POSTHOOK: Output: default@over1k_part4_1@s=bob falkner +POSTHOOK: Output: default@over1k_part4_1@s=bob garcia +POSTHOOK: Output: default@over1k_part4_1@s=bob hernandez +POSTHOOK: Output: default@over1k_part4_1@s=bob ichabod +POSTHOOK: Output: default@over1k_part4_1@s=bob king +POSTHOOK: Output: default@over1k_part4_1@s=bob laertes +POSTHOOK: Output: default@over1k_part4_1@s=bob miller +POSTHOOK: Output: default@over1k_part4_1@s=bob ovid +POSTHOOK: Output: default@over1k_part4_1@s=bob polk +POSTHOOK: Output: default@over1k_part4_1@s=bob quirinius +POSTHOOK: Output: default@over1k_part4_1@s=bob steinbeck +POSTHOOK: Output: default@over1k_part4_1@s=bob van buren +POSTHOOK: Output: default@over1k_part4_1@s=bob white +POSTHOOK: Output: default@over1k_part4_1@s=bob xylophone +POSTHOOK: Output: default@over1k_part4_1@s=bob young +POSTHOOK: Output: default@over1k_part4_1@s=bob zipper +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob brown).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob carson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob davidson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ellison).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob falkner).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob garcia).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob hernandez).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ichabod).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob king).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob laertes).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob miller).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ovid).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob polk).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob quirinius).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob steinbeck).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob van buren).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob white).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob xylophone).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob young).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob zipper).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob brown).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob carson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob davidson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ellison).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob falkner).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob garcia).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob hernandez).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ichabod).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob king).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob laertes).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob miller).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ovid).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob polk).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob quirinius).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob steinbeck).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob van buren).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob white).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob xylophone).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob young).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob zipper).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +PREHOOK: query: select count(1) from over1k_part4_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_0 +PREHOOK: Input: default@over1k_part4_0@s=bob brown +PREHOOK: Input: default@over1k_part4_0@s=bob carson +PREHOOK: Input: default@over1k_part4_0@s=bob davidson +PREHOOK: Input: default@over1k_part4_0@s=bob ellison +PREHOOK: Input: default@over1k_part4_0@s=bob falkner +PREHOOK: Input: default@over1k_part4_0@s=bob garcia +PREHOOK: Input: default@over1k_part4_0@s=bob hernandez +PREHOOK: Input: default@over1k_part4_0@s=bob ichabod +PREHOOK: Input: default@over1k_part4_0@s=bob king +PREHOOK: Input: default@over1k_part4_0@s=bob laertes +PREHOOK: Input: default@over1k_part4_0@s=bob miller +PREHOOK: Input: default@over1k_part4_0@s=bob ovid +PREHOOK: Input: default@over1k_part4_0@s=bob polk +PREHOOK: Input: default@over1k_part4_0@s=bob quirinius +PREHOOK: Input: default@over1k_part4_0@s=bob steinbeck +PREHOOK: Input: default@over1k_part4_0@s=bob van buren +PREHOOK: Input: default@over1k_part4_0@s=bob white +PREHOOK: Input: default@over1k_part4_0@s=bob xylophone +PREHOOK: Input: default@over1k_part4_0@s=bob young +PREHOOK: Input: default@over1k_part4_0@s=bob zipper +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_0 +POSTHOOK: Input: default@over1k_part4_0@s=bob brown +POSTHOOK: Input: default@over1k_part4_0@s=bob carson +POSTHOOK: Input: default@over1k_part4_0@s=bob davidson +POSTHOOK: Input: default@over1k_part4_0@s=bob ellison +POSTHOOK: Input: default@over1k_part4_0@s=bob falkner +POSTHOOK: Input: default@over1k_part4_0@s=bob garcia +POSTHOOK: Input: default@over1k_part4_0@s=bob hernandez +POSTHOOK: Input: default@over1k_part4_0@s=bob ichabod +POSTHOOK: Input: default@over1k_part4_0@s=bob king +POSTHOOK: Input: default@over1k_part4_0@s=bob laertes +POSTHOOK: Input: default@over1k_part4_0@s=bob miller +POSTHOOK: Input: default@over1k_part4_0@s=bob ovid +POSTHOOK: Input: default@over1k_part4_0@s=bob polk +POSTHOOK: Input: default@over1k_part4_0@s=bob quirinius +POSTHOOK: Input: default@over1k_part4_0@s=bob steinbeck +POSTHOOK: Input: default@over1k_part4_0@s=bob van buren +POSTHOOK: Input: default@over1k_part4_0@s=bob white +POSTHOOK: Input: default@over1k_part4_0@s=bob xylophone +POSTHOOK: Input: default@over1k_part4_0@s=bob young +POSTHOOK: Input: default@over1k_part4_0@s=bob zipper +#### A masked pattern was here #### +41 +PREHOOK: query: select count(1) from over1k_part4_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_1 +PREHOOK: Input: default@over1k_part4_1@s=bob brown +PREHOOK: Input: default@over1k_part4_1@s=bob carson +PREHOOK: Input: default@over1k_part4_1@s=bob davidson +PREHOOK: Input: default@over1k_part4_1@s=bob ellison +PREHOOK: Input: default@over1k_part4_1@s=bob falkner +PREHOOK: Input: default@over1k_part4_1@s=bob garcia +PREHOOK: Input: default@over1k_part4_1@s=bob hernandez +PREHOOK: Input: default@over1k_part4_1@s=bob ichabod +PREHOOK: Input: default@over1k_part4_1@s=bob king +PREHOOK: Input: default@over1k_part4_1@s=bob laertes +PREHOOK: Input: default@over1k_part4_1@s=bob miller +PREHOOK: Input: default@over1k_part4_1@s=bob ovid +PREHOOK: Input: default@over1k_part4_1@s=bob polk +PREHOOK: Input: default@over1k_part4_1@s=bob quirinius +PREHOOK: Input: default@over1k_part4_1@s=bob steinbeck +PREHOOK: Input: default@over1k_part4_1@s=bob van buren +PREHOOK: Input: default@over1k_part4_1@s=bob white +PREHOOK: Input: default@over1k_part4_1@s=bob xylophone +PREHOOK: Input: default@over1k_part4_1@s=bob young +PREHOOK: Input: default@over1k_part4_1@s=bob zipper +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_1 +POSTHOOK: Input: default@over1k_part4_1@s=bob brown +POSTHOOK: Input: default@over1k_part4_1@s=bob carson +POSTHOOK: Input: default@over1k_part4_1@s=bob davidson +POSTHOOK: Input: default@over1k_part4_1@s=bob ellison +POSTHOOK: Input: default@over1k_part4_1@s=bob falkner +POSTHOOK: Input: default@over1k_part4_1@s=bob garcia +POSTHOOK: Input: default@over1k_part4_1@s=bob hernandez +POSTHOOK: Input: default@over1k_part4_1@s=bob ichabod +POSTHOOK: Input: default@over1k_part4_1@s=bob king +POSTHOOK: Input: default@over1k_part4_1@s=bob laertes +POSTHOOK: Input: default@over1k_part4_1@s=bob miller +POSTHOOK: Input: default@over1k_part4_1@s=bob ovid +POSTHOOK: Input: default@over1k_part4_1@s=bob polk +POSTHOOK: Input: default@over1k_part4_1@s=bob quirinius +POSTHOOK: Input: default@over1k_part4_1@s=bob steinbeck +POSTHOOK: Input: default@over1k_part4_1@s=bob van buren +POSTHOOK: Input: default@over1k_part4_1@s=bob white +POSTHOOK: Input: default@over1k_part4_1@s=bob xylophone +POSTHOOK: Input: default@over1k_part4_1@s=bob young +POSTHOOK: Input: default@over1k_part4_1@s=bob zipper +#### A masked pattern was here #### +41 PREHOOK: query: drop table over1k_n3 PREHOOK: type: DROPTABLE PREHOOK: Input: default@over1k_n3 diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/reducesink_dedup.q.out index b89df52f96..4b1112cb2e 100644 --- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out +++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: select p_name +PREHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_name +POSTHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 POSTHOOK: type: QUERY @@ -36,3 +36,201 @@ almond antique medium spring khaki almond antique sky peru orange almond aquamarine dodger light gainsboro almond azure blanched chiffon midnight +PREHOOK: query: create temporary table d1 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d1 +POSTHOOK: query: create temporary table d1 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d1 +PREHOOK: query: create temporary table d2 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d2 +POSTHOOK: query: create temporary table d2 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d2 +PREHOOK: query: explain from (select key from src cluster by key) a +insert overwrite table d1 select a.key +insert overwrite table d2 select a.key cluster by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@d1 +PREHOOK: Output: default@d2 +POSTHOOK: query: explain from (select key from src cluster by key) a +insert overwrite table d1 select a.key +insert overwrite table d2 select a.key cluster by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@d1 +POSTHOOK: Output: default@d2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +