Index: ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (revision 980915) +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (working copy) @@ -163,8 +163,6 @@ } command.append(" GROUP BY "); command.append(indexCols + ", " + VirtualColumn.FILENAME.getName()); - command.append(" SORT BY "); - command.append(indexCols); Driver driver = new Driver(db.getConf()); driver.compile(command.toString()); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (revision 980915) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java (working copy) @@ -80,11 +80,22 @@ // The output of a partial aggregation is a list if (m == Mode.PARTIAL1) { inputOI = (PrimitiveObjectInspector) parameters[0]; - return ObjectInspectorFactory.getStandardListObjectInspector(inputOI); + return ObjectInspectorFactory + .getStandardListObjectInspector((PrimitiveObjectInspector) ObjectInspectorUtils + .getStandardObjectInspector(inputOI)); } else { - internalMergeOI = (StandardListObjectInspector) parameters[0]; - loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI); - return loi; + if (!(parameters[0] instanceof StandardListObjectInspector)) { + //no map aggregation. + inputOI = (PrimitiveObjectInspector) ObjectInspectorUtils + .getStandardObjectInspector(parameters[0]); + return (StandardListObjectInspector) ObjectInspectorFactory + .getStandardListObjectInspector(inputOI); + } else { + internalMergeOI = (StandardListObjectInspector) parameters[0]; + inputOI = (PrimitiveObjectInspector) internalMergeOI.getListElementObjectInspector(); + loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI); + return loi; + } } } Index: ql/src/test/queries/clientpositive/udaf_collect_set.q =================================================================== --- ql/src/test/queries/clientpositive/udaf_collect_set.q (revision 0) +++ ql/src/test/queries/clientpositive/udaf_collect_set.q (revision 0) @@ -0,0 +1,30 @@ +DESCRIBE FUNCTION collect_set; +DESCRIBE FUNCTION EXTENDED collect_set; + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20; + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20; + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20; + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20; Index: ql/src/test/results/clientpositive/udaf_collect_set.q.out =================================================================== --- ql/src/test/results/clientpositive/udaf_collect_set.q.out (revision 0) +++ ql/src/test/results/clientpositive/udaf_collect_set.q.out (revision 0) @@ -0,0 +1,138 @@ +PREHOOK: query: DESCRIBE FUNCTION collect_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION collect_set +POSTHOOK: type: DESCFUNCTION +collect_set(x) - Returns a set of objects with duplicate elements eliminated +PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set +POSTHOOK: type: DESCFUNCTION +collect_set(x) - Returns a set of objects with duplicate elements eliminated +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-05-53_522_3530309455217069909/-mr-10000 +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-05-53_522_3530309455217069909/-mr-10000 +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-01_685_2541220772134185861/-mr-10000 +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-01_685_2541220772134185861/-mr-10000 +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-09_632_7957341289614848394/-mr-10000 +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-09_632_7957341289614848394/-mr-10000 +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-23_542_3742508270905277781/-mr-10000 +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-30_17-06-23_542_3742508270905277781/-mr-10000 +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"]