diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 1ab914d..651246f 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -683,6 +683,7 @@ beeline.positive.exclude=add_part_exist.q,\ udf_printf.q,\ udf_sentences.q,\ udf_sort_array.q,\ + udf_sort_array_by.q,\ udf_split.q,\ udf_struct.q,\ udf_substr.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 2f4a94c..59473ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -424,6 +424,7 @@ system.registerGenericUDF("elt", GenericUDFElt.class); system.registerGenericUDF("concat_ws", GenericUDFConcatWS.class); system.registerGenericUDF("sort_array", GenericUDFSortArray.class); + system.registerGenericUDF("sort_array_by", GenericUDFSortArrayByField.class); system.registerGenericUDF("array_contains", GenericUDFArrayContains.class); system.registerGenericUDF("sentences", GenericUDFSentences.class); system.registerGenericUDF("map_keys", GenericUDFMapKeys.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArrayByField.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArrayByField.java new file mode 100644 index 0000000..6a4d2a6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArrayByField.java @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; + + +/** + * Generic UDF for tuple array sort by desired field[s] with [ordering(ASC or DESC)] + * SORT_ARRAY_BY(array(obj1, obj2, obj3...),'f1','f2',..,['ASC','DESC']). + * + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF + */ +@Description(name = "sort_array_by", value = "_FUNC_(array(obj1, obj2,...),'f1','f2',...,['ASC','DESC']) - " + + "Sorts the input tuple array in user specified order(ASC,DESC) by desired field[s] name" + + " If sorting order is not mentioned by user then dafault sorting order is ascending", + extended = "Example:\n" + + " > SELECT _FUNC_(array(struct('g',100),struct('b',200)),'col1','ASC') FROM src LIMIT 1;\n" + + " array(struct('b',200),struct('g',100)) ") +public class GenericUDFSortArrayByField extends GenericUDF { + private transient Converter[] converters; + private transient PrimitiveCategory[] inputTypes; + /**Output array results*/ + private final List ret = new ArrayList(); + private transient ListObjectInspector listObjectInspector; + private transient StructObjectInspector structObjectInspector; + /**All sorting fields*/ + private transient StructField[] fields; + /**Number of fields based on sorting will take place*/ + private transient int noOfInputFields; + + /**All possible ordering constants*/ + private enum SORT_ORDER_TYPE { + ASC, + DESC + }; + /**default sorting order*/ + private transient SORT_ORDER_TYPE sortOrder = SORT_ORDER_TYPE.ASC; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; + returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); + + /**This UDF requires minimum 2 arguments array_name,field name*/ + if (arguments.length < 2) { + throw new UDFArgumentLengthException("SORT_ARRAY_BY requires minimum 2 arguments, got " + arguments.length); + } + + /**First argument must be array*/ + switch (arguments[0].getCategory()) { + case LIST: + listObjectInspector = (ListObjectInspector) arguments[0]; + break; + default: + throw new UDFArgumentTypeException(0, "Argument 1 of function SORT_ARRAY_BY must be " + + serdeConstants.LIST_TYPE_NAME + ", but " + + arguments[0].getTypeName() + " was found."); + } + + /**Elements inside first argument(array) must be tuple(s)*/ + switch (listObjectInspector.getListElementObjectInspector().getCategory()) { + case STRUCT: + structObjectInspector = (StructObjectInspector) listObjectInspector.getListElementObjectInspector(); + break; + default: + throw new UDFArgumentTypeException(0, "Element[s] of first argument array in function SORT_ARRAY_BY must be " + + serdeConstants.STRUCT_TYPE_NAME + ", but " + listObjectInspector.getTypeName() + " was found."); + } + + /**All sort fields argument name and sort order name must be in String type*/ + converters = new Converter[arguments.length]; + inputTypes = new PrimitiveCategory[arguments.length]; + fields = new StructField[arguments.length - 1]; + noOfInputFields = arguments.length - 1; + for (int i = 1; i < arguments.length; i++) { + checkArgPrimitive(arguments, i); + checkArgGroups(arguments, i, inputTypes, PrimitiveGrouping.STRING_GROUP); + if (arguments[i] instanceof ConstantObjectInspector) { + String fieldName = getConstantStringValue(arguments, i); + /**checking whether any sorting order (ASC,DESC) has specified in last argument*/ + if (i != 1 + && (i == arguments.length - 1) + && (fieldName.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || fieldName.trim().toUpperCase() + .equals(SORT_ORDER_TYPE.DESC.name()))) { + sortOrder = SORT_ORDER_TYPE.valueOf(fieldName.trim().toUpperCase()); + noOfInputFields -= 1; + continue; + } + fields[i - 1] = structObjectInspector.getStructFieldRef(getConstantStringValue(arguments, i)); + } + obtainStringConverter(arguments, i, inputTypes, converters); + } + + ObjectInspector returnOI = returnOIResolver.get(structObjectInspector); + converters[0] = ObjectInspectorConverters.getConverter(structObjectInspector, returnOI); + return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (arguments[0].get() == null) { + return null; + } + + /**Except first argument all remaining are field names and [sorting order]*/ + + /**Add all non constant string tuple fields based on which sorting will happen with sorting ordering information if any.*/ + String field = null; + + /**If sorting order is set in initialize method then we are excluding last argument */ + for (int i = 0; i < noOfInputFields && fields[i] == null; i++) { + field = getStringValue(arguments, i + 1, converters); + if (i != 0 + && (i == arguments.length - 2) + && (field.trim().toUpperCase().equals(SORT_ORDER_TYPE.ASC.name()) || field.trim().toUpperCase() + .equals(SORT_ORDER_TYPE.DESC.name()))) { + noOfInputFields -= 1; + sortOrder = SORT_ORDER_TYPE.valueOf(field.trim().toUpperCase()); + continue; + } + fields[i] = structObjectInspector.getStructFieldRef(field); + } + + Object array = arguments[0].get(); + List retArray = (List) listObjectInspector.getList(array); + + /**Sort the tuple*/ + Collections.sort(retArray, new Comparator() { + + @Override + public int compare(Object object1, Object object2) { + int result = 0; + /**If multiple fields are mentioned for sorting a record then inside the loop we do will do sorting for each field*/ + for (int i = 0; i < noOfInputFields; i++) { + Object o1 = structObjectInspector.getStructFieldData(object1, fields[i]); + Object o2 = structObjectInspector.getStructFieldData(object2, fields[i]); + result = + ObjectInspectorUtils.compare(o1, fields[i].getFieldObjectInspector(), o2, + fields[i].getFieldObjectInspector()); + if (result != 0) { + /**Ordering*/ + if (sortOrder == SORT_ORDER_TYPE.DESC) { + result *= -1; + } + return result; + } + } + return result; + } + }); + + ret.clear(); + for (int i = 0; i < retArray.size(); i++) { + ret.add(converters[0].convert(retArray.get(i))); + } + return ret; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("sort_array_by", children); + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArrayByField.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArrayByField.java new file mode 100644 index 0000000..d17c368 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFSortArrayByField.java @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import static java.util.Arrays.asList; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + + +public class TestGenericUDFSortArrayByField { + + private final GenericUDFSortArrayByField udf = new GenericUDFSortArrayByField(); + + @Test + public void testSortPrimitiveTupleOneField() throws HiveException { + + List tuple = new ArrayList(); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + tuple.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + ObjectInspector[] inputOIs = + { ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(asList("Company", "Salary"), tuple)), + PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector }; + + udf.initialize(inputOIs); + + Object i1 = asList(new Text("Facebook"), new DoubleWritable(80223.25)); + Object i2 = asList(new Text("Facebook"), new DoubleWritable(50223.25)); + Object i3 = asList(new Text("Facebook"), new DoubleWritable(40223.25)); + Object i4 = asList(new Text("Facebook"), new DoubleWritable(60223.25)); + + HiveVarchar vc = new HiveVarchar(); + vc.setValue("Salary"); + GenericUDF.DeferredJavaObject[] argas = + { new GenericUDF.DeferredJavaObject(asList(i1, i2, i3, i4)), new GenericUDF.DeferredJavaObject( + new HiveVarcharWritable(vc)) }; + + runAndVerify(argas, asList(i3, i2, i4, i1)); + } + + @Test + public void testSortPrimitiveTupleOneFieldOrderASC() throws HiveException { + + List tuple = new ArrayList(); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + tuple.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + ObjectInspector[] inputOIs = + { ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(asList("Company", "Salary"), tuple)), + PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector, + PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector }; + + udf.initialize(inputOIs); + + Object i1 = asList(new Text("Facebook"), new DoubleWritable(80223.25)); + Object i2 = asList(new Text("Facebook"), new DoubleWritable(50223.25)); + Object i3 = asList(new Text("Facebook"), new DoubleWritable(40223.25)); + Object i4 = asList(new Text("Facebook"), new DoubleWritable(60223.25)); + + HiveVarchar vc = new HiveVarchar(); + vc.setValue("Salary"); + HiveVarchar order = new HiveVarchar(); + order.setValue("ASC"); + GenericUDF.DeferredJavaObject[] argas = + { new GenericUDF.DeferredJavaObject(asList(i1, i2, i3, i4)), + new GenericUDF.DeferredJavaObject(new HiveVarcharWritable(vc)), + new GenericUDF.DeferredJavaObject(new HiveVarcharWritable(order)) + }; + + runAndVerify(argas, asList(i3, i2, i4, i1)); + } + + @Test + public void testSortPrimitiveTupleTwoField() throws HiveException { + + List tuple = new ArrayList(); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + + ObjectInspector[] inputOIs = + { ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(asList("Company", "Department"), tuple)), + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector }; + + udf.initialize(inputOIs); + + Object i1 = asList(new Text("Linkedin"), new Text("HR")); + Object i2 = asList(new Text("Linkedin"), new Text("IT")); + Object i3 = asList(new Text("Linkedin"), new Text("Finance")); + Object i4 = asList(new Text("Facebook"), new Text("IT")); + Object i5 = asList(new Text("Facebook"), new Text("Finance")); + Object i6 = asList(new Text("Facebook"), new Text("HR")); + Object i7 = asList(new Text("Google"), new Text("Logistics")); + Object i8 = asList(new Text("Google"), new Text("Finance")); + Object i9 = asList(new Text("Google"), new Text("HR")); + + HiveVarchar vc = new HiveVarchar(); + vc.setValue("Department"); + + GenericUDF.DeferredJavaObject[] argas = + { new GenericUDF.DeferredJavaObject(asList(i1, i2, i3, i4, i5, i6, i7, i8, i9)), + new GenericUDF.DeferredJavaObject( + new Text("Company")), new GenericUDF.DeferredJavaObject(new HiveVarcharWritable(vc)) }; + + runAndVerify(argas, asList(i5, i6, i4, i8, i9, i7, i3, i1, i2)); + } + + + @Test + public void testSortPrimitiveTupleTwoFieldOrderDESC() throws HiveException { + + List tuple = new ArrayList(); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + + ObjectInspector[] inputOIs = + { ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(asList("Company", "Department"), tuple)), + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector, + PrimitiveObjectInspectorFactory.writableStringObjectInspector}; + + udf.initialize(inputOIs); + + Object i1 = asList(new Text("Linkedin"), new Text("HR")); + Object i2 = asList(new Text("Linkedin"), new Text("IT")); + Object i3 = asList(new Text("Linkedin"), new Text("Finance")); + Object i4 = asList(new Text("Facebook"), new Text("IT")); + Object i5 = asList(new Text("Facebook"), new Text("Finance")); + Object i6 = asList(new Text("Facebook"), new Text("HR")); + Object i7 = asList(new Text("Google"), new Text("Logistics")); + Object i8 = asList(new Text("Google"), new Text("Finance")); + Object i9 = asList(new Text("Google"), new Text("HR")); + + HiveVarchar vc = new HiveVarchar(); + vc.setValue("Department"); + + GenericUDF.DeferredJavaObject[] argas = + { new GenericUDF.DeferredJavaObject(asList(i1, i2, i3, i4, i5, i6, i7, i8, i9)), + new GenericUDF.DeferredJavaObject(new Text("Company")), + new GenericUDF.DeferredJavaObject(new HiveVarcharWritable(vc)), + new GenericUDF.DeferredJavaObject(new Text("DESC"))}; + + runAndVerify(argas, asList(i2, i1, i3, i7, i9, i8, i4, i6, i5)); + } + + @Test + public void testSortTupleArrayStructOrderDESC() throws HiveException { + List tuple = new ArrayList(); + tuple.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + tuple.add(ObjectInspectorFactory + .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector)); + + ObjectInspector field = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + new Text("Scores")); + + ObjectInspector orderField = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + new Text("desc")); + + ObjectInspector[] inputOIs = + { ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory + .getStandardStructObjectInspector(asList("Student", "Scores"), tuple)), + field, + orderField + }; + + udf.initialize(inputOIs); + + Object i1 = + asList(new Text("Foo"), asList(new IntWritable(4), new IntWritable(3), new IntWritable(2), new IntWritable(1))); + + Object i2 = + asList(new Text("Boo"), asList(new IntWritable(2), new IntWritable(3), new IntWritable(2), new IntWritable(1))); + + Object i3 = + asList(new Text("Tom"), asList(new IntWritable(10), new IntWritable(3), new IntWritable(2), new IntWritable(1))); + + GenericUDF.DeferredJavaObject[] argas = + { new GenericUDF.DeferredJavaObject(asList(i1, i2, i3)), + new GenericUDF.DeferredJavaObject(field), + new GenericUDF.DeferredJavaObject(orderField), + }; + + runAndVerify(argas, asList(i3, i1, i2)); + } + + + private void runAndVerify(GenericUDF.DeferredJavaObject[] args, List expected) throws HiveException { + List result = (List) udf.evaluate(args); + Assert.assertEquals("Check size", expected.size(), result.size()); + Assert.assertArrayEquals("Check content", expected.toArray(), result.toArray()); + } +} diff --git ql/src/test/queries/clientnegative/udf_sort_array_by_wrong1.q ql/src/test/queries/clientnegative/udf_sort_array_by_wrong1.q new file mode 100644 index 0000000..9101c7e --- /dev/null +++ ql/src/test/queries/clientnegative/udf_sort_array_by_wrong1.q @@ -0,0 +1,2 @@ +-- invalid argument type for first argument +SELECT sort_array_by(array(2, 5, 4),'col1') FROM src LIMIT 1; \ No newline at end of file diff --git ql/src/test/queries/clientnegative/udf_sort_array_by_wrong2.q ql/src/test/queries/clientnegative/udf_sort_array_by_wrong2.q new file mode 100644 index 0000000..241d98e --- /dev/null +++ ql/src/test/queries/clientnegative/udf_sort_array_by_wrong2.q @@ -0,0 +1,2 @@ +-- invalid numbers of arguments +SELECT sort_array_by(array(struct(800 ,'Foo',28,80000) , struct(100,'Boo',21,70000))) FROM src LIMIT 1; \ No newline at end of file diff --git ql/src/test/queries/clientnegative/udf_sort_array_by_wrong3.q ql/src/test/queries/clientnegative/udf_sort_array_by_wrong3.q new file mode 100644 index 0000000..f2f046e --- /dev/null +++ ql/src/test/queries/clientnegative/udf_sort_array_by_wrong3.q @@ -0,0 +1,16 @@ +-- invalid field name in side struct + +DROP TABLE IF EXISTS sort_array_by_order_wrong; + +CREATE TABLE sort_array_by_order_wrong +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('name','Able' ,'salary',28), + named_struct('name','Boo' ,'salary',70000), + named_struct('name','Hary' ,'salary',50000) + ) as employee +; + +select company,sort_array_by(employee,'firstName') as col1 from sort_array_by_order_wrong ; diff --git ql/src/test/queries/clientpositive/udf_sort_array_by.q ql/src/test/queries/clientpositive/udf_sort_array_by.q new file mode 100644 index 0000000..4c8c878 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_sort_array_by.q @@ -0,0 +1,136 @@ +use default; +-- Test sort_array_by() UDF + +DESCRIBE FUNCTION sort_array_by; +DESCRIBE FUNCTION EXTENDED sort_array_by; + +DROP TABLE IF EXISTS sort_array_by_table; + +CREATE TABLE sort_array_by_table +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('empId',800 ,'name','Able' ,'age',28 ,'salary',80000), + named_struct('empId',756 ,'name','Able' ,'age',23 ,'salary',76889), + named_struct('empId',100 ,'name','Boo' ,'age',21 ,'salary',70000), + named_struct('empId',130 ,'name','Boo' ,'age',22 ,'salary',79000), + named_struct('empId',900 ,'name','Hary' ,'age',21 ,'salary',50000), + named_struct('empId',76 ,'name','Hary' ,'age',87 ,'salary',10000) + ) as employee, + "IN" as country +UNION ALL +SELECT "Facebook" as company, + array( + named_struct('empId',200 ,'name','Keiko' ,'age',28 ,'salary',80000), + named_struct('empId',206 ,'name','Keiko' ,'age',41 ,'salary',80500), + named_struct('empId',390 ,'name','Ben' ,'age',21 ,'salary',70000), + named_struct('empId',310 ,'name','Ben' ,'age',31 ,'salary',21000), + named_struct('empId',700 ,'name','Aron' ,'age',21 ,'salary',50000), + named_struct('empId',320 ,'name','Aron' ,'age',18 ,'salary',70000) + ) as employee, + "US" as country +UNION ALL +SELECT "Microsoft" as company, + array( + named_struct('empId',900 ,'name','Spiro' ,'age',28 ,'salary',80000), + named_struct('empId',300 ,'name','Spiro' ,'age',38 ,'salary',80300), + named_struct('empId',600 ,'name','James' ,'age',21 ,'salary',70000), + named_struct('empId',313 ,'name','James' ,'age',11 ,'salary',30000), + named_struct('empId',260 ,'name','Eden' ,'age',31 ,'salary',50020), + named_struct('empId',730 ,'name','Eden' ,'age',45 ,'salary',20300) + ) as employee, + "UK" as country +; + + +--Sort tuple array by field name:salary(single column) by default ascending order +select company,country,sort_array_by(employee,'salary') as single_field_sort from sort_array_by_table; + +--Sort tuple array by field name:salary(single column) by ascending order +select company,country,sort_array_by(employee,'salary','ASC') as single_field_sort from sort_array_by_table; + +--Sort tuple array by field name:salary(single column) by descending order +select company,country,sort_array_by(employee,'salary','desc') as single_field_sort from sort_array_by_table; + +--Above three in one query +select company,country, +sort_array_by(employee,'salary') as single_field_sort, +sort_array_by(employee,'salary','ASC') as single_field_sort_asc, +sort_array_by(employee,'salary','DESC') as single_field_sort_desc +from sort_array_by_table; + + +--Sort tuple array by field names : name,salary(multiple columns) by default ascending order +select company,country,sort_array_by(employee,'name','salary') as multiple_field_sort from sort_array_by_table; + +--Sort tuple array by field names : name,salary(multiple columns) ascending order +select company,country,sort_array_by(employee,'name','salary','asc') as multiple_field_sort from sort_array_by_table; + +--Sort tuple array by field names : name,salary(multiple columns) descending order +select company,country,sort_array_by(employee,'name',"salary","DESC") as multiple_field_sort from sort_array_by_table; + + +--Above three in one query +select company,country, +sort_array_by(employee,'name','salary') as multiple_field_sort, +sort_array_by(employee,'name','salary','ASC') as multiple_field_sort_asc, +sort_array_by(employee,'name',"salary","DESC") as multiple_field_sort_desc +from sort_array_by_table; + + +-- Test for order name ('ASC' and 'DESC') as tuple field names and and order name +DROP TABLE IF EXISTS sort_array_by_order_name; + +CREATE TABLE sort_array_by_order_name +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('asc','Able' ,'DESC','Keiko','salary',28), + named_struct('asc','Boo' ,'DESC','Aron','salary',70000), + named_struct('asc','Hary' ,'DESC','James' ,'salary',50000) + ) as employee ; + +-- select asc,desc as filed name with default sorting +select +company, +sort_array_by(employee,'asc') as col1, +sort_array_by(employee,'DESC') as col2 +from sort_array_by_order_name ; + +--select asc,desc as field name and explicitly provided sorting ordering. +--If argument length's size are more than two (first: tuple list,second: desired minimum a field name) +--then we always check whether the last argument is any sorting order name(ASC or DESC) +select +company, +sort_array_by(employee,'asc','ASC') as col1, +sort_array_by(employee,'DESC','desc') as col2 +from +sort_array_by_order_name ; + +-- similarity of sorting order check between this UDF and LATERAL VIEW explode(array). + +DROP TABLE IF EXISTS sort_array_by_table_order; + +CREATE TABLE sort_array_by_table_order +STORED AS TEXTFILE +AS +SELECT array( + named_struct('name','Able' ,'age',28), + named_struct('name','Able' ,'age',23), + named_struct('name','Boo' ,'age',21), + named_struct('name','Boo' ,'age',22), + named_struct('name','Hary' ,'age',21), + named_struct('name','Hary' ,'age',87) + ) as a_struct_array +; + +SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(a_struct_array) structTable AS a_struct ORDER BY a_struct.name DESC; + +SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(sort_array_by(a_struct_array, 'name', 'DESC')) structTable AS a_struct; + +SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(a_struct_array) structTable AS a_struct ORDER BY a_struct.name DESC,a_struct.age DESC ; + +SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(sort_array_by(a_struct_array, 'name','age', 'DESC')) structTable AS a_struct ; + diff --git ql/src/test/results/beelinepositive/show_functions.q.out ql/src/test/results/beelinepositive/show_functions.q.out index 4f3ec40..8e06bf0 100644 --- ql/src/test/results/beelinepositive/show_functions.q.out +++ ql/src/test/results/beelinepositive/show_functions.q.out @@ -139,6 +139,7 @@ Saving all output to "!!{outputDirectory}!!/show_functions.q.raw". Enter "record 'sin' 'size' 'sort_array' +'sort_array_by' 'space' 'split' 'sqrt' diff --git ql/src/test/results/clientnegative/udf_sort_array_by_wrong1.q.out ql/src/test/results/clientnegative/udf_sort_array_by_wrong1.q.out new file mode 100644 index 0000000..c7794d3 --- /dev/null +++ ql/src/test/results/clientnegative/udf_sort_array_by_wrong1.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10016]: Line 2:21 Argument type mismatch '4': Element[s] of first argument array in function SORT_ARRAY_BY must be struct, but array was found. diff --git ql/src/test/results/clientnegative/udf_sort_array_by_wrong2.q.out ql/src/test/results/clientnegative/udf_sort_array_by_wrong2.q.out new file mode 100644 index 0000000..220ade5 --- /dev/null +++ ql/src/test/results/clientnegative/udf_sort_array_by_wrong2.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch '70000': SORT_ARRAY_BY requires minimum 2 arguments, got 1 diff --git ql/src/test/results/clientnegative/udf_sort_array_by_wrong3.q.out ql/src/test/results/clientnegative/udf_sort_array_by_wrong3.q.out new file mode 100644 index 0000000..09b3594 --- /dev/null +++ ql/src/test/results/clientnegative/udf_sort_array_by_wrong3.q.out @@ -0,0 +1,37 @@ +PREHOOK: query: -- invalid field name in side struct + +DROP TABLE IF EXISTS sort_array_by_order_wrong +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- invalid field name in side struct + +DROP TABLE IF EXISTS sort_array_by_order_wrong +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sort_array_by_order_wrong +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('name','Able' ,'salary',28), + named_struct('name','Boo' ,'salary',70000), + named_struct('name','Hary' ,'salary',50000) + ) as employee +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@sort_array_by_order_wrong +POSTHOOK: query: CREATE TABLE sort_array_by_order_wrong +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('name','Able' ,'salary',28), + named_struct('name','Boo' ,'salary',70000), + named_struct('name','Hary' ,'salary',50000) + ) as employee +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sort_array_by_order_wrong +POSTHOOK: Lineage: sort_array_by_order_wrong.company SIMPLE [] +POSTHOOK: Lineage: sort_array_by_order_wrong.employee EXPRESSION [] +FAILED: RuntimeException cannot find field firstname from [0:name, 1:salary] diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index a811747..5e764b0 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -194,6 +194,7 @@ sign sin size sort_array +sort_array_by soundex space split diff --git ql/src/test/results/clientpositive/udf_sort_array_by.q.out ql/src/test/results/clientpositive/udf_sort_array_by.q.out new file mode 100644 index 0000000..7778d1e --- /dev/null +++ ql/src/test/results/clientpositive/udf_sort_array_by.q.out @@ -0,0 +1,401 @@ +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:default +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +PREHOOK: query: -- Test sort_array_by() UDF + +DESCRIBE FUNCTION sort_array_by +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: -- Test sort_array_by() UDF + +DESCRIBE FUNCTION sort_array_by +POSTHOOK: type: DESCFUNCTION +sort_array_by(array(obj1, obj2,...),'f1','f2',...,['ASC','DESC']) - Sorts the input tuple array in user specified order(ASC,DESC) by desired field[s] name If sorting order is not mentioned by user then dafault sorting order is ascending +PREHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array_by +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array_by +POSTHOOK: type: DESCFUNCTION +sort_array_by(array(obj1, obj2,...),'f1','f2',...,['ASC','DESC']) - Sorts the input tuple array in user specified order(ASC,DESC) by desired field[s] name If sorting order is not mentioned by user then dafault sorting order is ascending +Example: + > SELECT sort_array_by(array(struct('g',100),struct('b',200)),'col1','ASC') FROM src LIMIT 1; + array(struct('b',200),struct('g',100)) +PREHOOK: query: DROP TABLE IF EXISTS sort_array_by_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS sort_array_by_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sort_array_by_table +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('empId',800 ,'name','Able' ,'age',28 ,'salary',80000), + named_struct('empId',756 ,'name','Able' ,'age',23 ,'salary',76889), + named_struct('empId',100 ,'name','Boo' ,'age',21 ,'salary',70000), + named_struct('empId',130 ,'name','Boo' ,'age',22 ,'salary',79000), + named_struct('empId',900 ,'name','Hary' ,'age',21 ,'salary',50000), + named_struct('empId',76 ,'name','Hary' ,'age',87 ,'salary',10000) + ) as employee, + "IN" as country +UNION ALL +SELECT "Facebook" as company, + array( + named_struct('empId',200 ,'name','Keiko' ,'age',28 ,'salary',80000), + named_struct('empId',206 ,'name','Keiko' ,'age',41 ,'salary',80500), + named_struct('empId',390 ,'name','Ben' ,'age',21 ,'salary',70000), + named_struct('empId',310 ,'name','Ben' ,'age',31 ,'salary',21000), + named_struct('empId',700 ,'name','Aron' ,'age',21 ,'salary',50000), + named_struct('empId',320 ,'name','Aron' ,'age',18 ,'salary',70000) + ) as employee, + "US" as country +UNION ALL +SELECT "Microsoft" as company, + array( + named_struct('empId',900 ,'name','Spiro' ,'age',28 ,'salary',80000), + named_struct('empId',300 ,'name','Spiro' ,'age',38 ,'salary',80300), + named_struct('empId',600 ,'name','James' ,'age',21 ,'salary',70000), + named_struct('empId',313 ,'name','James' ,'age',11 ,'salary',30000), + named_struct('empId',260 ,'name','Eden' ,'age',31 ,'salary',50020), + named_struct('empId',730 ,'name','Eden' ,'age',45 ,'salary',20300) + ) as employee, + "UK" as country +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@sort_array_by_table +POSTHOOK: query: CREATE TABLE sort_array_by_table +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('empId',800 ,'name','Able' ,'age',28 ,'salary',80000), + named_struct('empId',756 ,'name','Able' ,'age',23 ,'salary',76889), + named_struct('empId',100 ,'name','Boo' ,'age',21 ,'salary',70000), + named_struct('empId',130 ,'name','Boo' ,'age',22 ,'salary',79000), + named_struct('empId',900 ,'name','Hary' ,'age',21 ,'salary',50000), + named_struct('empId',76 ,'name','Hary' ,'age',87 ,'salary',10000) + ) as employee, + "IN" as country +UNION ALL +SELECT "Facebook" as company, + array( + named_struct('empId',200 ,'name','Keiko' ,'age',28 ,'salary',80000), + named_struct('empId',206 ,'name','Keiko' ,'age',41 ,'salary',80500), + named_struct('empId',390 ,'name','Ben' ,'age',21 ,'salary',70000), + named_struct('empId',310 ,'name','Ben' ,'age',31 ,'salary',21000), + named_struct('empId',700 ,'name','Aron' ,'age',21 ,'salary',50000), + named_struct('empId',320 ,'name','Aron' ,'age',18 ,'salary',70000) + ) as employee, + "US" as country +UNION ALL +SELECT "Microsoft" as company, + array( + named_struct('empId',900 ,'name','Spiro' ,'age',28 ,'salary',80000), + named_struct('empId',300 ,'name','Spiro' ,'age',38 ,'salary',80300), + named_struct('empId',600 ,'name','James' ,'age',21 ,'salary',70000), + named_struct('empId',313 ,'name','James' ,'age',11 ,'salary',30000), + named_struct('empId',260 ,'name','Eden' ,'age',31 ,'salary',50020), + named_struct('empId',730 ,'name','Eden' ,'age',45 ,'salary',20300) + ) as employee, + "UK" as country +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sort_array_by_table +POSTHOOK: Lineage: sort_array_by_table.company EXPRESSION [] +POSTHOOK: Lineage: sort_array_by_table.country EXPRESSION [] +POSTHOOK: Lineage: sort_array_by_table.employee EXPRESSION [] +PREHOOK: query: --Sort tuple array by field name:salary(single column) by default ascending order +select company,country,sort_array_by(employee,'salary') as single_field_sort from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Sort tuple array by field name:salary(single column) by default ascending order +select company,country,sort_array_by(employee,'salary') as single_field_sort from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":800,"name":"Able","age":28,"salary":80000}] +Facebook US [{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] +Microsoft UK [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] +PREHOOK: query: --Sort tuple array by field name:salary(single column) by ascending order +select company,country,sort_array_by(employee,'salary','ASC') as single_field_sort from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Sort tuple array by field name:salary(single column) by ascending order +select company,country,sort_array_by(employee,'salary','ASC') as single_field_sort from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":800,"name":"Able","age":28,"salary":80000}] +Facebook US [{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] +Microsoft UK [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] +PREHOOK: query: --Sort tuple array by field name:salary(single column) by descending order +select company,country,sort_array_by(employee,'salary','desc') as single_field_sort from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Sort tuple array by field name:salary(single column) by descending order +select company,country,sort_array_by(employee,'salary','desc') as single_field_sort from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":76,"name":"Hary","age":87,"salary":10000}] +Facebook US [{"empid":206,"name":"Keiko","age":41,"salary":80500},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":310,"name":"Ben","age":31,"salary":21000}] +Microsoft UK [{"empid":300,"name":"Spiro","age":38,"salary":80300},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":730,"name":"Eden","age":45,"salary":20300}] +PREHOOK: query: --Above three in one query +select company,country, +sort_array_by(employee,'salary') as single_field_sort, +sort_array_by(employee,'salary','ASC') as single_field_sort_asc, +sort_array_by(employee,'salary','DESC') as single_field_sort_desc +from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Above three in one query +select company,country, +sort_array_by(employee,'salary') as single_field_sort, +sort_array_by(employee,'salary','ASC') as single_field_sort_asc, +sort_array_by(employee,'salary','DESC') as single_field_sort_desc +from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":800,"name":"Able","age":28,"salary":80000}] [{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":800,"name":"Able","age":28,"salary":80000}] [{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":76,"name":"Hary","age":87,"salary":10000}] +Facebook US [{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] [{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] [{"empid":206,"name":"Keiko","age":41,"salary":80500},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":310,"name":"Ben","age":31,"salary":21000}] +Microsoft UK [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] [{"empid":300,"name":"Spiro","age":38,"salary":80300},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":730,"name":"Eden","age":45,"salary":20300}] +PREHOOK: query: --Sort tuple array by field names : name,salary(multiple columns) by default ascending order +select company,country,sort_array_by(employee,'name','salary') as multiple_field_sort from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Sort tuple array by field names : name,salary(multiple columns) by default ascending order +select company,country,sort_array_by(employee,'name','salary') as multiple_field_sort from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000}] +Facebook US [{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] +Microsoft UK [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] +PREHOOK: query: --Sort tuple array by field names : name,salary(multiple columns) ascending order +select company,country,sort_array_by(employee,'name','salary','asc') as multiple_field_sort from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Sort tuple array by field names : name,salary(multiple columns) ascending order +select company,country,sort_array_by(employee,'name','salary','asc') as multiple_field_sort from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000}] +Facebook US [{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] +Microsoft UK [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] +PREHOOK: query: --Sort tuple array by field names : name,salary(multiple columns) descending order +select company,country,sort_array_by(employee,'name',"salary","DESC") as multiple_field_sort from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Sort tuple array by field names : name,salary(multiple columns) descending order +select company,country,sort_array_by(employee,'name',"salary","DESC") as multiple_field_sort from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":756,"name":"Able","age":23,"salary":76889}] +Facebook US [{"empid":206,"name":"Keiko","age":41,"salary":80500},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":700,"name":"Aron","age":21,"salary":50000}] +Microsoft UK [{"empid":300,"name":"Spiro","age":38,"salary":80300},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":730,"name":"Eden","age":45,"salary":20300}] +PREHOOK: query: --Above three in one query +select company,country, +sort_array_by(employee,'name','salary') as multiple_field_sort, +sort_array_by(employee,'name','salary','ASC') as multiple_field_sort_asc, +sort_array_by(employee,'name',"salary","DESC") as multiple_field_sort_desc +from sort_array_by_table +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +POSTHOOK: query: --Above three in one query +select company,country, +sort_array_by(employee,'name','salary') as multiple_field_sort, +sort_array_by(employee,'name','salary','ASC') as multiple_field_sort_asc, +sort_array_by(employee,'name',"salary","DESC") as multiple_field_sort_desc +from sort_array_by_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table +#### A masked pattern was here #### +Google IN [{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000}] [{"empid":756,"name":"Able","age":23,"salary":76889},{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":900,"name":"Hary","age":21,"salary":50000}] [{"empid":900,"name":"Hary","age":21,"salary":50000},{"empid":76,"name":"Hary","age":87,"salary":10000},{"empid":130,"name":"Boo","age":22,"salary":79000},{"empid":100,"name":"Boo","age":21,"salary":70000},{"empid":800,"name":"Able","age":28,"salary":80000},{"empid":756,"name":"Able","age":23,"salary":76889}] +Facebook US [{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] [{"empid":700,"name":"Aron","age":21,"salary":50000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":206,"name":"Keiko","age":41,"salary":80500}] [{"empid":206,"name":"Keiko","age":41,"salary":80500},{"empid":200,"name":"Keiko","age":28,"salary":80000},{"empid":390,"name":"Ben","age":21,"salary":70000},{"empid":310,"name":"Ben","age":31,"salary":21000},{"empid":320,"name":"Aron","age":18,"salary":70000},{"empid":700,"name":"Aron","age":21,"salary":50000}] +Microsoft UK [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] [{"empid":730,"name":"Eden","age":45,"salary":20300},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":300,"name":"Spiro","age":38,"salary":80300}] [{"empid":300,"name":"Spiro","age":38,"salary":80300},{"empid":900,"name":"Spiro","age":28,"salary":80000},{"empid":600,"name":"James","age":21,"salary":70000},{"empid":313,"name":"James","age":11,"salary":30000},{"empid":260,"name":"Eden","age":31,"salary":50020},{"empid":730,"name":"Eden","age":45,"salary":20300}] +PREHOOK: query: -- Test for order name ('ASC' and 'DESC') as tuple field names and and order name +DROP TABLE IF EXISTS sort_array_by_order_name +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test for order name ('ASC' and 'DESC') as tuple field names and and order name +DROP TABLE IF EXISTS sort_array_by_order_name +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sort_array_by_order_name +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('asc','Able' ,'DESC','Keiko','salary',28), + named_struct('asc','Boo' ,'DESC','Aron','salary',70000), + named_struct('asc','Hary' ,'DESC','James' ,'salary',50000) + ) as employee +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@sort_array_by_order_name +POSTHOOK: query: CREATE TABLE sort_array_by_order_name +STORED AS TEXTFILE +AS +SELECT "Google" as company, + array( + named_struct('asc','Able' ,'DESC','Keiko','salary',28), + named_struct('asc','Boo' ,'DESC','Aron','salary',70000), + named_struct('asc','Hary' ,'DESC','James' ,'salary',50000) + ) as employee +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sort_array_by_order_name +POSTHOOK: Lineage: sort_array_by_order_name.company SIMPLE [] +POSTHOOK: Lineage: sort_array_by_order_name.employee EXPRESSION [] +PREHOOK: query: -- select asc,desc as filed name with default sorting +select +company, +sort_array_by(employee,'asc') as col1, +sort_array_by(employee,'DESC') as col2 +from sort_array_by_order_name +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_order_name +#### A masked pattern was here #### +POSTHOOK: query: -- select asc,desc as filed name with default sorting +select +company, +sort_array_by(employee,'asc') as col1, +sort_array_by(employee,'DESC') as col2 +from sort_array_by_order_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_order_name +#### A masked pattern was here #### +Google [{"asc":"Able","desc":"Keiko","salary":28},{"asc":"Boo","desc":"Aron","salary":70000},{"asc":"Hary","desc":"James","salary":50000}] [{"asc":"Boo","desc":"Aron","salary":70000},{"asc":"Hary","desc":"James","salary":50000},{"asc":"Able","desc":"Keiko","salary":28}] +PREHOOK: query: --select asc,desc as field name and explicitly provided sorting ordering. +--If argument length's size are more than two (first: tuple list,second: desired minimum a field name) +--then we always check whether the last argument is any sorting order name(ASC or DESC) +select +company, +sort_array_by(employee,'asc','ASC') as col1, +sort_array_by(employee,'DESC','desc') as col2 +from +sort_array_by_order_name +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_order_name +#### A masked pattern was here #### +POSTHOOK: query: --select asc,desc as field name and explicitly provided sorting ordering. +--If argument length's size are more than two (first: tuple list,second: desired minimum a field name) +--then we always check whether the last argument is any sorting order name(ASC or DESC) +select +company, +sort_array_by(employee,'asc','ASC') as col1, +sort_array_by(employee,'DESC','desc') as col2 +from +sort_array_by_order_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_order_name +#### A masked pattern was here #### +Google [{"asc":"Able","desc":"Keiko","salary":28},{"asc":"Boo","desc":"Aron","salary":70000},{"asc":"Hary","desc":"James","salary":50000}] [{"asc":"Able","desc":"Keiko","salary":28},{"asc":"Hary","desc":"James","salary":50000},{"asc":"Boo","desc":"Aron","salary":70000}] +PREHOOK: query: -- similarity of sorting order check between this UDF and LATERAL VIEW explode(array). + +DROP TABLE IF EXISTS sort_array_by_table_order +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- similarity of sorting order check between this UDF and LATERAL VIEW explode(array). + +DROP TABLE IF EXISTS sort_array_by_table_order +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sort_array_by_table_order +STORED AS TEXTFILE +AS +SELECT array( + named_struct('name','Able' ,'age',28), + named_struct('name','Able' ,'age',23), + named_struct('name','Boo' ,'age',21), + named_struct('name','Boo' ,'age',22), + named_struct('name','Hary' ,'age',21), + named_struct('name','Hary' ,'age',87) + ) as a_struct_array +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@sort_array_by_table_order +POSTHOOK: query: CREATE TABLE sort_array_by_table_order +STORED AS TEXTFILE +AS +SELECT array( + named_struct('name','Able' ,'age',28), + named_struct('name','Able' ,'age',23), + named_struct('name','Boo' ,'age',21), + named_struct('name','Boo' ,'age',22), + named_struct('name','Hary' ,'age',21), + named_struct('name','Hary' ,'age',87) + ) as a_struct_array +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sort_array_by_table_order +POSTHOOK: Lineage: sort_array_by_table_order.a_struct_array EXPRESSION [] +PREHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(a_struct_array) structTable AS a_struct ORDER BY a_struct.name DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +POSTHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(a_struct_array) structTable AS a_struct ORDER BY a_struct.name DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +{"name":"Hary","age":87} +{"name":"Hary","age":21} +{"name":"Boo","age":22} +{"name":"Boo","age":21} +{"name":"Able","age":23} +{"name":"Able","age":28} +PREHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(sort_array_by(a_struct_array, 'name', 'DESC')) structTable AS a_struct +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +POSTHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(sort_array_by(a_struct_array, 'name', 'DESC')) structTable AS a_struct +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +{"name":"Hary","age":21} +{"name":"Hary","age":87} +{"name":"Boo","age":21} +{"name":"Boo","age":22} +{"name":"Able","age":28} +{"name":"Able","age":23} +PREHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(a_struct_array) structTable AS a_struct ORDER BY a_struct.name DESC,a_struct.age DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +POSTHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(a_struct_array) structTable AS a_struct ORDER BY a_struct.name DESC,a_struct.age DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +{"name":"Hary","age":87} +{"name":"Hary","age":21} +{"name":"Boo","age":22} +{"name":"Boo","age":21} +{"name":"Able","age":28} +{"name":"Able","age":23} +PREHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(sort_array_by(a_struct_array, 'name','age', 'DESC')) structTable AS a_struct +PREHOOK: type: QUERY +PREHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +POSTHOOK: query: SELECT a_struct FROM sort_array_by_table_order LATERAL VIEW explode(sort_array_by(a_struct_array, 'name','age', 'DESC')) structTable AS a_struct +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sort_array_by_table_order +#### A masked pattern was here #### +{"name":"Hary","age":87} +{"name":"Hary","age":21} +{"name":"Boo","age":22} +{"name":"Boo","age":21} +{"name":"Able","age":28} +{"name":"Able","age":23}