diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java index 3c7f0b78c2..71ee25d9e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectsEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -65,6 +67,11 @@ public KeyWrapper getKeyWrapper() { class ListKeyWrapper extends KeyWrapper { int hashcode = -1; Object[] keys; + @Override + public String toString() { + return "ListKeyWrapper [keys=" + Arrays.toString(keys) + "]"; + } + // decide whether this is already in hashmap (keys in hashmap are deepcopied // version, and we need to use 'currentKeyObjectInspector'). ListObjectsEqualComparer equalComparer; @@ -165,6 +172,11 @@ private void deepCopyElements(Object[] keys, transient StringObjectInspector soi_new, soi_copy; class TextKeyWrapper extends KeyWrapper { + @Override + public String toString() { + return "TextKeyWrapper [key=" + key + "]"; + } + int hashcode; Object key; boolean isCopy; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 29f3579fee..16d7c519fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -90,6 +90,11 @@ protected static class MapOpCtx { + @Override + public String toString() { + return "[alias=" + alias + ", op=" + op + "]"; + } + final String alias; final Operator op; final PartitionDesc partDesc; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d5ed581861..fabff87cdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10206,62 +10206,62 @@ private Operator genSelectAllDesc(Operator input) throws SemanticException { List> inputOperators = new ArrayList>(ks.size()); - List> sprayKeyLists = new ArrayList>(ks.size()); - List> distinctKeyLists = new ArrayList>(ks.size()); + // We will try to combine multiple clauses into a smaller number with compatible keys. + List> newSprayKeyLists = new ArrayList>(ks.size()); + List> newDistinctKeyLists = new ArrayList>(ks.size()); // Iterate over each clause for (String dest : ks) { Operator input = inputs.get(dest); RowResolver inputRR = opParseCtx.get(input).getRowResolver(); - List distinctKeys = getDistinctExprs(qbp, dest, inputRR); - List sprayKeys = new ArrayList(); + // Determine the keys for the current clause. + List currentDistinctKeys = getDistinctExprs(qbp, dest, inputRR); + List currentSprayKeys = determineSprayKeys(qbp, dest, inputRR); - // Add the group by expressions - List grpByExprs = getGroupByForClause(qbp, dest); - for (ASTNode grpByExpr : grpByExprs) { - ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR); - if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) { - sprayKeys.add(exprDesc); - } - } - - // Loop through each of the lists of exprs, looking for a match + // Loop through each of the lists of exprs, looking for a match. boolean found = false; - for (int i = 0; i < sprayKeyLists.size(); i++) { + for (int i = 0; i < newSprayKeyLists.size(); i++) { if (!input.equals(inputOperators.get(i))) { continue; } + // We will try to merge this clause into one of the previously added ones. + List targetSprayKeys = newSprayKeyLists.get(i); + List targetDistinctKeys = newDistinctKeyLists.get(i); + if (currentDistinctKeys.isEmpty() != targetDistinctKeys.isEmpty()) { + // GBY without distinct keys is not prepared to process distinct key structured rows. + continue; + } - if (distinctKeys.isEmpty()) { + if (currentDistinctKeys.isEmpty()) { // current dest has no distinct keys. List combinedList = new ArrayList(); - combineExprNodeLists(sprayKeyLists.get(i), distinctKeyLists.get(i), combinedList); - if (!matchExprLists(combinedList, sprayKeys)) { + combineExprNodeLists(targetSprayKeys, targetDistinctKeys, combinedList); + if (!matchExprLists(combinedList, currentSprayKeys)) { continue; } // else do the common code at the end. } else { - if (distinctKeyLists.get(i).isEmpty()) { + if (targetDistinctKeys.isEmpty()) { List combinedList = new ArrayList(); - combineExprNodeLists(sprayKeys, distinctKeys, combinedList); - if (!matchExprLists(combinedList, sprayKeyLists.get(i))) { + combineExprNodeLists(currentSprayKeys, currentDistinctKeys, combinedList); + if (!matchExprLists(combinedList, targetSprayKeys)) { continue; } else { // we have found a match. insert this distinct clause to head. - distinctKeyLists.remove(i); - sprayKeyLists.remove(i); - distinctKeyLists.add(i, distinctKeys); - sprayKeyLists.add(i, sprayKeys); + newDistinctKeyLists.remove(i); + newSprayKeyLists.remove(i); + newDistinctKeyLists.add(i, currentDistinctKeys); + newSprayKeyLists.add(i, currentSprayKeys); commonGroupByDestGroups.get(i).add(0, dest); found = true; break; } } else { - if (!matchExprLists(distinctKeyLists.get(i), distinctKeys)) { + if (!matchExprLists(targetDistinctKeys, currentDistinctKeys)) { continue; } - if (!matchExprLists(sprayKeyLists.get(i), sprayKeys)) { + if (!matchExprLists(targetSprayKeys, currentSprayKeys)) { continue; } // else do common code @@ -10278,8 +10278,8 @@ private Operator genSelectAllDesc(Operator input) throws SemanticException { // No match was found, so create new entries if (!found) { inputOperators.add(input); - sprayKeyLists.add(sprayKeys); - distinctKeyLists.add(distinctKeys); + newSprayKeyLists.add(currentSprayKeys); + newDistinctKeyLists.add(currentDistinctKeys); List destGroup = new ArrayList(); destGroup.add(dest); commonGroupByDestGroups.add(destGroup); @@ -10289,6 +10289,21 @@ private Operator genSelectAllDesc(Operator input) throws SemanticException { return commonGroupByDestGroups; } + protected List determineSprayKeys(QBParseInfo qbp, String dest, + RowResolver inputRR) throws SemanticException { + List sprayKeys = new ArrayList(); + + // Add the group by expressions + List grpByExprs = getGroupByForClause(qbp, dest); + for (ASTNode grpByExpr : grpByExprs) { + ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR); + if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) { + sprayKeys.add(exprDesc); + } + } + return sprayKeys; + } + private void combineExprNodeLists(List list, List list2, List combinedList) { combinedList.addAll(list); diff --git ql/src/test/queries/clientpositive/multi_insert_distinct.q ql/src/test/queries/clientpositive/multi_insert_distinct.q new file mode 100644 index 0000000000..ac3e5ee883 --- /dev/null +++ ql/src/test/queries/clientpositive/multi_insert_distinct.q @@ -0,0 +1,66 @@ +--! qt:dataset:src + +CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +; + +INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a'); + + +CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ; +CREATE TABLE tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ; +CREATE TABLE tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ; + + +set hive.explain.user=false; +set hive.stats.autogather=false; +set hive.stats.column.autogather=false; + +explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2; + +FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2; + +select * from tmp_grouped_by_two_col; + +truncate table tmp_grouped_by_two_col; + +explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2; + +FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2; + +select * from tmp_grouped_by_two_col; + +explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3; + +FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3; + +select * from tmp_grouped_by_all_col; \ No newline at end of file diff --git ql/src/test/results/clientpositive/multi_insert_distinct.q.out ql/src/test/results/clientpositive/multi_insert_distinct.q.out new file mode 100644 index 0000000000..e86711ad6c --- /dev/null +++ ql/src/test/results/clientpositive/multi_insert_distinct.q.out @@ -0,0 +1,534 @@ +PREHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp1 +POSTHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp1 +PREHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tmp1 +POSTHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tmp1 +POSTHOOK: Lineage: tmp1.v1 SCRIPT [] +POSTHOOK: Lineage: tmp1.v2 SCRIPT [] +POSTHOOK: Lineage: tmp1.v3 SCRIPT [] +PREHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_grouped_by_all_col +POSTHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_grouped_by_all_col +PREHOOK: query: CREATE TABLE tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: query: CREATE TABLE tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_grouped_by_one_col +PREHOOK: query: CREATE TABLE tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: CREATE TABLE tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_grouped_by_two_col +PREHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tmp1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v2), count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + +PREHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Output: default@tmp_grouped_by_one_col +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +PREHOOK: query: select * from tmp_grouped_by_two_col +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_grouped_by_two_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +v1 v2 2 +PREHOOK: query: truncate table tmp_grouped_by_two_col +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: truncate table tmp_grouped_by_two_col +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@tmp_grouped_by_two_col +PREHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tmp1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v2), count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(v3) + keys: v1 (type: string), v2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + +PREHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Output: default@tmp_grouped_by_one_col +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +PREHOOK: query: select * from tmp_grouped_by_two_col +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_grouped_by_two_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +v1 v2 2 +PREHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tmp1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v2), count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_all_col + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_all_col + +PREHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Output: default@tmp_grouped_by_all_col +PREHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Output: default@tmp_grouped_by_all_col +POSTHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: Lineage: tmp_grouped_by_all_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_all_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_all_col.v3 SIMPLE [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +PREHOOK: query: select * from tmp_grouped_by_all_col +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_grouped_by_all_col +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_grouped_by_all_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_grouped_by_all_col +#### A masked pattern was here #### +v1 v2 v3 +v1 v2 v3a diff --git ql/src/test/results/clientpositive/multi_insert_gby3.q.out ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 6c75853ef7..677d9d479f 100644 --- ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -39,11 +39,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 - Stage-1 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -56,82 +57,67 @@ STAGE PLANS: expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Execution mode: vectorized + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: key, keyd - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1_n2 Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: key, keyd, value - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,7 +143,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: vectorized Reduce Operator Tree: @@ -165,16 +151,16 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-6 Stats Work Basic Stats Work: Column Stats Desc: @@ -182,6 +168,52 @@ STAGE PLANS: Column Types: string, double, string Table: default.e2_n3 + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2_n3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Move Operator tables: @@ -192,7 +224,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2_n3 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -232,11 +264,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 - Stage-1 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -249,82 +282,68 @@ STAGE PLANS: expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Execution mode: vectorized - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: key, keyd - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) - mode: complete + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2_n3 Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: key, keyd, value - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -334,15 +353,15 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 + name: default.e2_n3 Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: - Columns: key, keyd - Column Types: string, double - Table: default.e1_n2 + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2_n3 Stage: Stage-4 Map Reduce @@ -350,30 +369,75 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-6 Stats Work Basic Stats Work: Column Stats Desc: - Columns: key, keyd, value - Column Types: string, double, string - Table: default.e2_n3 + Columns: key, keyd + Column Types: string, double + Table: default.e1_n2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1_n2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -383,26 +447,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 + name: default.e1_n2 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1967,15 +2031,16 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0, Stage-5, Stage-7, Stage-10 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8, Stage-11 Stage-5 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1, Stage-5, Stage-7, Stage-10 - Stage-9 depends on stages: Stage-2, Stage-5, Stage-7, Stage-10 - Stage-1 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-3 - Stage-8 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-8 - Stage-10 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8, Stage-11 + Stage-10 depends on stages: Stage-2, Stage-5, Stage-8, Stage-11 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-9 STAGE PLANS: Stage: Stage-3 @@ -1988,12 +2053,33 @@ STAGE PLANS: expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT _col1) keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) @@ -2007,74 +2093,39 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: key, keyd - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1_n2 Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: key, keyd, value - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -2100,7 +2151,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: vectorized Reduce Operator Tree: @@ -2108,16 +2159,16 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-7 Stats Work Basic Stats Work: Column Stats Desc: @@ -2125,7 +2176,7 @@ STAGE PLANS: Column Types: string, double, string Table: default.e2_n3 - Stage: Stage-9 + Stage: Stage-10 Stats Work Basic Stats Work: Column Stats Desc: @@ -2133,6 +2184,52 @@ STAGE PLANS: Column Types: string, double Table: default.e3 + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2_n3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 Move Operator tables: @@ -2143,7 +2240,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2_n3 - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -2166,7 +2263,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -2221,7 +2318,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 9c4cdec0c4..113ff46fc0 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -47,10 +47,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -59,53 +60,84 @@ STAGE PLANS: expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Execution mode: vectorized Reducer 2 Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) - mode: complete + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1_n2 + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2_n3 Stage: Stage-0 Move Operator @@ -160,10 +192,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -172,53 +205,84 @@ STAGE PLANS: expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 + Execution mode: vectorized Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2_n3 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1_n2 Stage: Stage-0 Move Operator @@ -228,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 + name: default.e2_n3 Stage: Stage-3 Stats Work @@ -242,7 +306,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 + name: default.e1_n2 Stage: Stage-4 Stats Work @@ -1731,11 +1795,36 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (GROUP, 2) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col2) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: src @@ -1744,14 +1833,20 @@ STAGE PLANS: expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Execution mode: vectorized - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -1773,45 +1868,46 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1_n2 - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) - mode: complete + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1_n2 + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2_n3 - Reducer 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2_n3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col3:0._col0)