Index: data/files/primitive_type_arrays.txt =================================================================== --- data/files/primitive_type_arrays.txt (revision 0) +++ data/files/primitive_type_arrays.txt (revision 0) @@ -0,0 +1 @@ +54321987216432168410001003572461truefalse3.1411.6182.7181.4143.141591.618032.718281.41421portosathosaramis1970-01-16 12:50:35.2421970-01-07 00:54:54.4421970-01-05 13:51:04.042 Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java (revision 1351652) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java (working copy) @@ -20,6 +20,8 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; +import java.util.List; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -29,13 +31,11 @@ import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; /** * Generic UDF for array sort @@ -52,7 +52,7 @@ + " 'a', 'b', 'c', 'd'") public class GenericUDFSortArray extends GenericUDF { private Converter[] converters; - private ArrayList ret = new ArrayList(); + private final List ret = new ArrayList(); private ObjectInspector[] argumentOIs; @Override @@ -68,8 +68,9 @@ switch(arguments[0].getCategory()) { case LIST: if(((ListObjectInspector)(arguments[0])).getListElementObjectInspector() - .getCategory().equals(Category.PRIMITIVE)) + .getCategory().equals(Category.PRIMITIVE)) { break; + } default: throw new UDFArgumentTypeException(0, "Argument 1" + " of function SORT_ARRAY must be " + Constants.LIST_TYPE_NAME @@ -98,9 +99,16 @@ Object array = arguments[0].get(); ListObjectInspector arrayOI = (ListObjectInspector) argumentOIs[0]; - ArrayList retArray = (ArrayList) arrayOI.getList(array); - Collections.sort(retArray); + List retArray = (List) arrayOI.getList(array); + final ObjectInspector valInspector = arrayOI.getListElementObjectInspector(); + Collections.sort(retArray, new Comparator() { + @Override + public int compare(Object o1, Object o2) { + return ObjectInspectorUtils.compare(o1, valInspector, o2, valInspector); + } + }); + ret.clear(); for (int i = 0; i < retArray.size(); i++) { ret.add(converters[0].convert(retArray.get(i))); Index: ql/src/test/queries/clientpositive/udf_sort_array.q =================================================================== --- ql/src/test/queries/clientpositive/udf_sort_array.q (revision 1351652) +++ ql/src/test/queries/clientpositive/udf_sort_array.q (working copy) @@ -17,3 +17,22 @@ -- Evaluate function against FLOAT valued keys SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1; +-- Test it against data in a table. +CREATE TABLE dest1 ( + tinyints ARRAY, + smallints ARRAY, + ints ARRAY, + bigints ARRAY, + booleans ARRAY, + floats ARRAY, + doubles ARRAY, + strings ARRAY, + timestamps ARRAY +) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1; + +SELECT sort_array(tinyints), sort_array(smallints), sort_array(ints), + sort_array(bigints), sort_array(booleans), sort_array(floats), + sort_array(doubles), sort_array(strings), sort_array(timestamps) + FROM dest1; Index: ql/src/test/results/clientpositive/udf_sort_array.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_sort_array.q.out (revision 1351652) +++ ql/src/test/results/clientpositive/udf_sort_array.q.out (working copy) @@ -99,3 +99,51 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### [-3.445,0.777,1.0,1.325,2.003,2.333,9.0] +PREHOOK: query: -- Test it against data in a table. +CREATE TABLE dest1 ( + tinyints ARRAY, + smallints ARRAY, + ints ARRAY, + bigints ARRAY, + booleans ARRAY, + floats ARRAY, + doubles ARRAY, + strings ARRAY, + timestamps ARRAY +) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test it against data in a table. +CREATE TABLE dest1 ( + tinyints ARRAY, + smallints ARRAY, + ints ARRAY, + bigints ARRAY, + booleans ARRAY, + floats ARRAY, + doubles ARRAY, + strings ARRAY, + timestamps ARRAY +) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1 +PREHOOK: type: LOAD +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dest1 +PREHOOK: query: SELECT sort_array(tinyints), sort_array(smallints), sort_array(ints), + sort_array(bigints), sort_array(booleans), sort_array(floats), + sort_array(doubles), sort_array(strings), sort_array(timestamps) + FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sort_array(tinyints), sort_array(smallints), sort_array(ints), + sort_array(bigints), sort_array(booleans), sort_array(floats), + sort_array(doubles), sort_array(strings), sort_array(timestamps) + FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +[1,2,3,4,5] [1,2,7,8,9] [4,8,16,32,64] [1,100,246,357,1000] [false,true] [1.414,1.618,2.718,3.141] [1.41421,1.61803,2.71828,3.14159] ["","aramis","athos","portos"] ["1970-01-05 13:51:04.042","1970-01-07 00:54:54.442","1970-01-16 12:50:35.242"]