diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java index 3c7f0b78c2..71ee25d9e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectsEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -65,6 +67,11 @@ public KeyWrapper getKeyWrapper() { class ListKeyWrapper extends KeyWrapper { int hashcode = -1; Object[] keys; + @Override + public String toString() { + return "ListKeyWrapper [keys=" + Arrays.toString(keys) + "]"; + } + // decide whether this is already in hashmap (keys in hashmap are deepcopied // version, and we need to use 'currentKeyObjectInspector'). ListObjectsEqualComparer equalComparer; @@ -165,6 +172,11 @@ private void deepCopyElements(Object[] keys, transient StringObjectInspector soi_new, soi_copy; class TextKeyWrapper extends KeyWrapper { + @Override + public String toString() { + return "TextKeyWrapper [key=" + key + "]"; + } + int hashcode; Object key; boolean isCopy; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 29f3579fee..16d7c519fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -90,6 +90,11 @@ protected static class MapOpCtx { + @Override + public String toString() { + return "[alias=" + alias + ", op=" + op + "]"; + } + final String alias; final Operator op; final PartitionDesc partDesc; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7ff7e189ea..76332a8581 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10228,38 +10228,35 @@ private Operator genSelectAllDesc(Operator input) throws SemanticException { } if (distinctKeys.isEmpty()) { - // current dest has no distinct keys. + // GBY without distinct keys is not prepared to process distinct key structured rows. + // Create another branch in the common code. + continue; + } + if (distinctKeyLists.get(i).isEmpty()) { List combinedList = new ArrayList(); - combineExprNodeLists(sprayKeyLists.get(i), distinctKeyLists.get(i), combinedList); - if (!matchExprLists(combinedList, sprayKeys)) { + combineExprNodeLists(sprayKeys, distinctKeys, combinedList); + if (!matchExprLists(combinedList, sprayKeyLists.get(i))) { continue; - } // else do the common code at the end. - } else { - if (distinctKeyLists.get(i).isEmpty()) { - List combinedList = new ArrayList(); - combineExprNodeLists(sprayKeys, distinctKeys, combinedList); - if (!matchExprLists(combinedList, sprayKeyLists.get(i))) { - continue; - } else { - // we have found a match. insert this distinct clause to head. - distinctKeyLists.remove(i); - sprayKeyLists.remove(i); - distinctKeyLists.add(i, distinctKeys); - sprayKeyLists.add(i, sprayKeys); - commonGroupByDestGroups.get(i).add(0, dest); - found = true; - break; - } } else { - if (!matchExprLists(distinctKeyLists.get(i), distinctKeys)) { - continue; - } + // we have found a match. insert this distinct clause to head. + distinctKeyLists.remove(i); + sprayKeyLists.remove(i); + distinctKeyLists.add(i, distinctKeys); + sprayKeyLists.add(i, sprayKeys); + commonGroupByDestGroups.get(i).add(0, dest); + found = true; + break; + } + } else { - if (!matchExprLists(sprayKeyLists.get(i), sprayKeys)) { - continue; - } - // else do common code + if (!matchExprLists(distinctKeyLists.get(i), distinctKeys)) { + continue; + } + + if (!matchExprLists(sprayKeyLists.get(i), sprayKeys)) { + continue; } + // else do common code } // common code diff --git ql/src/test/queries/clientpositive/multi_insert_distinct.q ql/src/test/queries/clientpositive/multi_insert_distinct.q new file mode 100644 index 0000000000..ac3e5ee883 --- /dev/null +++ ql/src/test/queries/clientpositive/multi_insert_distinct.q @@ -0,0 +1,66 @@ +--! qt:dataset:src + +CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +; + +INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a'); + + +CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ; +CREATE TABLE tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ; +CREATE TABLE tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ; + + +set hive.explain.user=false; +set hive.stats.autogather=false; +set hive.stats.column.autogather=false; + +explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2; + +FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2; + +select * from tmp_grouped_by_two_col; + +truncate table tmp_grouped_by_two_col; + +explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2; + +FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2; + +select * from tmp_grouped_by_two_col; + +explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3; + +FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3; + +select * from tmp_grouped_by_all_col; \ No newline at end of file diff --git ql/src/test/results/clientpositive/multi_insert_distinct.q.out ql/src/test/results/clientpositive/multi_insert_distinct.q.out new file mode 100644 index 0000000000..e86711ad6c --- /dev/null +++ ql/src/test/results/clientpositive/multi_insert_distinct.q.out @@ -0,0 +1,534 @@ +PREHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp1 +POSTHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp1 +PREHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tmp1 +POSTHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tmp1 +POSTHOOK: Lineage: tmp1.v1 SCRIPT [] +POSTHOOK: Lineage: tmp1.v2 SCRIPT [] +POSTHOOK: Lineage: tmp1.v3 SCRIPT [] +PREHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_grouped_by_all_col +POSTHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_grouped_by_all_col +PREHOOK: query: CREATE TABLE tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: query: CREATE TABLE tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_grouped_by_one_col +PREHOOK: query: CREATE TABLE tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: CREATE TABLE tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_grouped_by_two_col +PREHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tmp1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v2), count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + +PREHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Output: default@tmp_grouped_by_one_col +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(distinct v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +PREHOOK: query: select * from tmp_grouped_by_two_col +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_grouped_by_two_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +v1 v2 2 +PREHOOK: query: truncate table tmp_grouped_by_two_col +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: truncate table tmp_grouped_by_two_col +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@tmp_grouped_by_two_col +PREHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tmp1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v2), count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(v3) + keys: v1 (type: string), v2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_two_col + +PREHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Output: default@tmp_grouped_by_one_col +PREHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_two_col +SELECT v1, v2, count(v3) GROUP BY v1, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: Output: default@tmp_grouped_by_two_col +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +PREHOOK: query: select * from tmp_grouped_by_two_col +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_grouped_by_two_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_grouped_by_two_col +#### A masked pattern was here #### +v1 v2 2 +PREHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: tmp1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT v2), count(DISTINCT v3) + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v1 (type: string), v2 (type: string), v3 (type: string) + outputColumnNames: v1, v2, v3 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: v1 (type: string), v2 (type: string), v3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_one_col + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_all_col + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmp_grouped_by_all_col + +PREHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Output: default@tmp_grouped_by_all_col +PREHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: query: FROM tmp1 +INSERT INTO tmp_grouped_by_one_col +SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1 +INSERT INTO tmp_grouped_by_all_col +SELECT v1, v2, v3 GROUP BY v1, v2, v3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Output: default@tmp_grouped_by_all_col +POSTHOOK: Output: default@tmp_grouped_by_one_col +POSTHOOK: Lineage: tmp_grouped_by_all_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_all_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_all_col.v3 SIMPLE [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ] +POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ] +PREHOOK: query: select * from tmp_grouped_by_all_col +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_grouped_by_all_col +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_grouped_by_all_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_grouped_by_all_col +#### A masked pattern was here #### +v1 v2 v3 +v1 v2 v3a