diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index 732a5aa..ab7b6e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -167,7 +167,7 @@ public class ReduceSinkOperator extends TerminalOperator ObjectInspector[] fieldObjectInspectors = initEvaluators(evals, 0, length, rowInspector); sois.addAll(Arrays.asList(fieldObjectInspectors)); - if (evals.length > length) { + if (outputColNames.size() > length) { // union keys List uois = new ArrayList(); for (List distinctCols : distinctColIndices) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index c6ae55d..36d0c3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -398,7 +398,7 @@ public final class PlanUtils { } unionTypes.add(TypeInfoFactory.getStructTypeInfo(names, types)); } - if (cols.size() - length > 0) { + if (outputColumnNames.size() - length > 0) { schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo( fieldPrefix + outputColumnNames.get(length), TypeInfoFactory.getUnionTypeInfo(unionTypes))); @@ -545,9 +545,12 @@ public final class PlanUtils { ArrayList outputKeyCols = new ArrayList(); ArrayList outputValCols = new ArrayList(); if (includeKeyCols) { - keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnListWithLength( - keyCols, distinctColIndices, outputKeyColumnNames, numKeys, ""), - order); + List keySchema = getFieldSchemasFromColumnListWithLength( + keyCols, distinctColIndices, outputKeyColumnNames, numKeys, ""); + if (order.length() < outputKeyColumnNames.size()) { + order = order + "+"; + } + keyTable = getReduceKeyTableDesc(keySchema, order); outputKeyCols.addAll(outputKeyColumnNames); } else { keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList( diff --git ql/src/test/queries/clientpositive/groupby_distinct_samekey.q ql/src/test/queries/clientpositive/groupby_distinct_samekey.q new file mode 100644 index 0000000..b64e9c4 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_distinct_samekey.q @@ -0,0 +1,9 @@ +create table t1 (int1 int, int2 int, str1 string, str2 string); + +explain select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1; +explain select int1, sum(distinct int1) from t1 group by int1; + +select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1; +select int1, sum(distinct int1) from t1 group by int1; + +drop table t1; \ No newline at end of file diff --git ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out new file mode 100644 index 0000000..139d715 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out @@ -0,0 +1,215 @@ +PREHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t1 +PREHOOK: query: explain select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL int1))))) Q1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL Q1) int1)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (. (TOK_TABLE_OR_COL Q1) int1)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL Q1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + q1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: int1 + type: int + expr: int2 + type: int + expr: str1 + type: string + expr: str2 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: sum(DISTINCT _col0) + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/navis/hive_2011-12-06_22-42-54_678_6880816387144416561/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col1:0._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select int1, sum(distinct int1) from t1 group by int1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select int1, sum(distinct int1) from t1 group by int1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL int1)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL int1)))) (TOK_GROUPBY (TOK_TABLE_OR_COL int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: int1 + type: int + outputColumnNames: int1 + Group By Operator + aggregations: + expr: sum(DISTINCT int1) + bucketGroup: false + keys: + expr: int1 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col1:0._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-12-06_22-42-55_007_7670241209892952007/-mr-10000 +POSTHOOK: query: select Q1.int1, sum(distinct Q1.int1) from (select * from t1 order by int1) Q1 group by Q1.int1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-12-06_22-42-55_007_7670241209892952007/-mr-10000 +PREHOOK: query: select int1, sum(distinct int1) from t1 group by int1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: file:/tmp/navis/hive_2011-12-06_22-43-00_371_1806093835357284158/-mr-10000 +POSTHOOK: query: select int1, sum(distinct int1) from t1 group by int1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: file:/tmp/navis/hive_2011-12-06_22-43-00_371_1806093835357284158/-mr-10000 +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1