diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 16a207e..ae2fd17 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -122,7 +122,6 @@ import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEWAHBitmap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFContextNGrams; @@ -130,6 +129,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovarianceSample; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEWAHBitmap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax; @@ -147,13 +147,13 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFField; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; @@ -163,6 +163,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapKeys; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapValues; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; @@ -416,6 +418,8 @@ public final class FunctionRegistry { registerGenericUDF("concat_ws", GenericUDFConcatWS.class); registerGenericUDF("array_contains", GenericUDFArrayContains.class); registerGenericUDF("sentences", GenericUDFSentences.class); + registerGenericUDF("map_keys", GenericUDFMapKeys.class); + registerGenericUDF("map_values", GenericUDFMapValues.class); // Generic UDTF's registerGenericUDTF("explode", GenericUDTFExplode.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapKeys.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapKeys.java new file mode 100644 index 0000000..b384717 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapKeys.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; + +/** + * GenericUDFMapKeys. + * + */ +@Description(name = "map_keys", value = "_FUNC_(map) - " + + "Returns an unordered array containing the keys of the input map.") +public class GenericUDFMapKeys extends GenericUDF { + private MapObjectInspector mapOI; + private final ArrayList retArray = new ArrayList(); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException("The function MAP_KEYS only accepts one argument."); + } else if (!(arguments[0] instanceof MapObjectInspector)) { + throw new UDFArgumentTypeException(0, "\"" + + Category.MAP.toString().toLowerCase() + + "\" is expected at function MAP_KEYS, " + "but \"" + + arguments[0].getTypeName() + "\" is found"); + } + + mapOI = (MapObjectInspector) arguments[0]; + ObjectInspector mapKeyOI = mapOI.getMapKeyObjectInspector(); + return ObjectInspectorFactory.getStandardListObjectInspector(mapKeyOI); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + retArray.clear(); + Object mapObj = arguments[0].get(); + retArray.addAll(mapOI.getMap(mapObj).keySet()); + return retArray; + } + + @Override + public String getDisplayString(String[] children) { + assert children.length == 1; + return "map_keys(" + children[0] + ")"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java new file mode 100644 index 0000000..73812cb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; + +/** + * GenericUDFMapValues. + * + */ +@Description(name = "map_values", value = "_FUNC_(map) - " + + "Returns an unordered array containing the values of the input map.") +public class GenericUDFMapValues extends GenericUDF { + private MapObjectInspector mapOI; + private final ArrayList retArray = new ArrayList(); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException("The function MAP_VALUES only accepts 1 argument."); + } else if (!(arguments[0] instanceof MapObjectInspector)) { + throw new UDFArgumentTypeException(0, "\"" + + Category.MAP.toString().toLowerCase() + + "\" is expected at function MAP_VALUES, " + "but \"" + + arguments[0].getTypeName() + "\" is found"); + } + + mapOI = (MapObjectInspector) arguments[0]; + ObjectInspector mapValueOI = mapOI.getMapValueObjectInspector(); + return ObjectInspectorFactory.getStandardListObjectInspector(mapValueOI); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + retArray.clear(); + Object mapObj = arguments[0].get(); + retArray.addAll(mapOI.getMap(mapObj).values()); + return retArray; + } + + @Override + public String getDisplayString(String[] children) { + assert children.length == 1; + return "map_values(" + children[0] + ")"; + } +} diff --git ql/src/test/queries/clientnegative/udf_map_keys_arg_num.q ql/src/test/queries/clientnegative/udf_map_keys_arg_num.q new file mode 100644 index 0000000..ebb6c2a --- /dev/null +++ ql/src/test/queries/clientnegative/udf_map_keys_arg_num.q @@ -0,0 +1 @@ +SELECT map_keys(map("a", "1"), map("b", "2")) FROM src LIMIT 1; diff --git ql/src/test/queries/clientnegative/udf_map_keys_arg_type.q ql/src/test/queries/clientnegative/udf_map_keys_arg_type.q new file mode 100644 index 0000000..0757d14 --- /dev/null +++ ql/src/test/queries/clientnegative/udf_map_keys_arg_type.q @@ -0,0 +1 @@ +SELECT map_keys(array(1, 2, 3)) FROM src LIMIT 1; diff --git ql/src/test/queries/clientnegative/udf_map_values_arg_num.q ql/src/test/queries/clientnegative/udf_map_values_arg_num.q new file mode 100644 index 0000000..c97476a --- /dev/null +++ ql/src/test/queries/clientnegative/udf_map_values_arg_num.q @@ -0,0 +1 @@ +SELECT map_values(map("a", "1"), map("b", "2")) FROM src LIMIT 1; diff --git ql/src/test/queries/clientnegative/udf_map_values_arg_type.q ql/src/test/queries/clientnegative/udf_map_values_arg_type.q new file mode 100644 index 0000000..cc060ea --- /dev/null +++ ql/src/test/queries/clientnegative/udf_map_values_arg_type.q @@ -0,0 +1 @@ +SELECT map_values(array(1, 2, 3, 4)) FROM src LIMIT 1; diff --git ql/src/test/queries/clientpositive/udf_map_keys.q ql/src/test/queries/clientpositive/udf_map_keys.q new file mode 100644 index 0000000..4e7999a --- /dev/null +++ ql/src/test/queries/clientpositive/udf_map_keys.q @@ -0,0 +1,12 @@ +use default; +-- Test map_keys() UDF + +DESCRIBE FUNCTION map_keys; +DESCRIBE FUNCTION EXTENDED map_keys; + +-- Evaluate function against INT valued keys +SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1; + +-- Evaluate function against STRING valued keys +SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1; + diff --git ql/src/test/queries/clientpositive/udf_map_values.q ql/src/test/queries/clientpositive/udf_map_values.q new file mode 100644 index 0000000..e25b9bc --- /dev/null +++ ql/src/test/queries/clientpositive/udf_map_values.q @@ -0,0 +1,11 @@ +use default; +-- Test map_values() UDF + +DESCRIBE FUNCTION map_values; +DESCRIBE FUNCTION EXTENDED map_values; + +-- Evaluate function against STRING valued values +SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1; + +-- Evaluate function against INT valued keys +SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1; diff --git ql/src/test/results/clientnegative/udf_map_keys_arg_num.q.out ql/src/test/results/clientnegative/udf_map_keys_arg_num.q.out new file mode 100644 index 0000000..5951998 --- /dev/null +++ ql/src/test/results/clientnegative/udf_map_keys_arg_num.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 1:7 Arguments length mismatch '"2"': The function MAP_KEYS only accepts one argument. diff --git ql/src/test/results/clientnegative/udf_map_keys_arg_type.q.out ql/src/test/results/clientnegative/udf_map_keys_arg_type.q.out new file mode 100644 index 0000000..3f32522 --- /dev/null +++ ql/src/test/results/clientnegative/udf_map_keys_arg_type.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 1:16 Argument type mismatch '3': "map" is expected at function MAP_KEYS, but "array" is found diff --git ql/src/test/results/clientnegative/udf_map_values_arg_num.q.out ql/src/test/results/clientnegative/udf_map_values_arg_num.q.out new file mode 100644 index 0000000..33e05c9 --- /dev/null +++ ql/src/test/results/clientnegative/udf_map_values_arg_num.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 1:7 Arguments length mismatch '"2"': The function MAP_VALUES only accepts 1 argument. diff --git ql/src/test/results/clientnegative/udf_map_values_arg_type.q.out ql/src/test/results/clientnegative/udf_map_values_arg_type.q.out new file mode 100644 index 0000000..54f4436 --- /dev/null +++ ql/src/test/results/clientnegative/udf_map_values_arg_type.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Line 1:18 Argument type mismatch '4': "map" is expected at function MAP_VALUES, but "array" is found diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index d4f6c98..a194779 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -91,6 +91,8 @@ lower lpad ltrim map +map_keys +map_values max min minute diff --git ql/src/test/results/clientpositive/udf_map_keys.q.out ql/src/test/results/clientpositive/udf_map_keys.q.out new file mode 100644 index 0000000..cd1e862 --- /dev/null +++ ql/src/test/results/clientpositive/udf_map_keys.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: -- Test map_keys() UDF + +DESCRIBE FUNCTION map_keys +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: -- Test map_keys() UDF + +DESCRIBE FUNCTION map_keys +POSTHOOK: type: DESCFUNCTION +map_keys(map) - Returns an array containing the keys of the input map. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED map_keys +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED map_keys +POSTHOOK: type: DESCFUNCTION +map_keys(map) - Returns an array containing the keys of the input map. +PREHOOK: query: -- Evaluate function against INT valued keys +SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-17_658_751510346641456894/-mr-10000 +POSTHOOK: query: -- Evaluate function against INT valued keys +SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-17_658_751510346641456894/-mr-10000 +[1,2,3] +PREHOOK: query: -- Evaluate function against STRING valued keys +SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-25_234_6070955623455878264/-mr-10000 +POSTHOOK: query: -- Evaluate function against STRING valued keys +SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-25_234_6070955623455878264/-mr-10000 +["b","a","c"] diff --git ql/src/test/results/clientpositive/udf_map_values.q.out ql/src/test/results/clientpositive/udf_map_values.q.out new file mode 100644 index 0000000..685b4c8 --- /dev/null +++ ql/src/test/results/clientpositive/udf_map_values.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: -- Test map_values() UDF + +DESCRIBE FUNCTION map_values +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: -- Test map_values() UDF + +DESCRIBE FUNCTION map_values +POSTHOOK: type: DESCFUNCTION +map_values(map) - Returns an array containing the values of the input map. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED map_values +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED map_values +POSTHOOK: type: DESCFUNCTION +map_values(map) - Returns an array containing the values of the input map. +PREHOOK: query: -- Evaluate function against STRING valued values +SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-30_831_5144047215476063078/-mr-10000 +POSTHOOK: query: -- Evaluate function against STRING valued values +SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-30_831_5144047215476063078/-mr-10000 +["a","b","c"] +PREHOOK: query: -- Evaluate function against INT valued keys +SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-37_992_5312993947281209093/-mr-10000 +POSTHOOK: query: -- Evaluate function against INT valued keys +SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/b7/b7UUwNZdF1KKHtM+5la6f++++TI/-Tmp-/carl/hive_2011-07-11_15-35-37_992_5312993947281209093/-mr-10000 +[2,1,3]