diff --git eclipse-templates/.settings/org.eclipse.jdt.ui.prefs eclipse-templates/.settings/org.eclipse.jdt.ui.prefs index a6391d3..3aa7a3d 100644 --- eclipse-templates/.settings/org.eclipse.jdt.ui.prefs +++ eclipse-templates/.settings/org.eclipse.jdt.ui.prefs @@ -32,7 +32,7 @@ cleanup.remove_private_constructors=true cleanup.remove_trailing_whitespaces=true cleanup.remove_trailing_whitespaces_all=true cleanup.remove_trailing_whitespaces_ignore_empty=false -cleanup.remove_unnecessary_casts=true +cleanup.remove_unnecessary_casts=false cleanup.remove_unnecessary_nls_tags=true cleanup.remove_unused_imports=true cleanup.remove_unused_local_variables=true @@ -91,7 +91,7 @@ sp_cleanup.remove_private_constructors=true sp_cleanup.remove_trailing_whitespaces=true sp_cleanup.remove_trailing_whitespaces_all=true sp_cleanup.remove_trailing_whitespaces_ignore_empty=false -sp_cleanup.remove_unnecessary_casts=true +sp_cleanup.remove_unnecessary_casts=false sp_cleanup.remove_unnecessary_nls_tags=false sp_cleanup.remove_unused_imports=true sp_cleanup.remove_unused_local_variables=false diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 6b28be5..d66967a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -137,6 +137,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; @@ -358,6 +359,7 @@ public final class FunctionRegistry { registerGenericUDF("locate", GenericUDFLocate.class); registerGenericUDF("elt", GenericUDFElt.class); registerGenericUDF("concat_ws", GenericUDFConcatWS.class); + registerGenericUDF("array_contains", GenericUDFArrayContains.class); // Generic UDTF's registerGenericUDTF("explode", GenericUDTFExplode.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayContains.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayContains.java new file mode 100644 index 0000000..d60dc86 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayContains.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; + +/** + * GenericUDFArrayContains. + * + */ +@Description(name = "array_contains", + value="_FUNC_(array, value) - Returns TRUE if the array contains value.", + extended="Example:\n" + + " > SELECT _FUNC_(array(1, 2, 3), 2) FROM src LIMIT 1;\n" + + " true") +public class GenericUDFArrayContains extends GenericUDF { + + private static final int ARRAY_IDX = 0; + private static final int VALUE_IDX = 1; + private static final int ARG_COUNT = 2; // Number of arguments to this UDF + private static final String FUNC_NAME = "ARRAY_CONTAINS"; // External Name + + private ObjectInspector valueOI; + private ListObjectInspector arrayOI; + private ObjectInspector arrayElementOI; + private BooleanWritable result; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if two arguments were passed + if (arguments.length != ARG_COUNT) { + throw new UDFArgumentException( + "The function " + FUNC_NAME + " accepts " + + ARG_COUNT + " arguments."); + } + + // Check if ARRAY_IDX argument is of category LIST + if (!arguments[ARRAY_IDX].getCategory().equals(Category.LIST)) { + throw new UDFArgumentTypeException(ARRAY_IDX, + "\"" + org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME + "\" " + + "expected at function ARRAY_CONTAINS, but " + + "\"" + arguments[ARRAY_IDX].getTypeName() + "\" " + + "is found"); + } + + arrayOI = (ListObjectInspector) arguments[ARRAY_IDX]; + arrayElementOI = arrayOI.getListElementObjectInspector(); + + valueOI = arguments[VALUE_IDX]; + + // Check if list element and value are of same type + if (!ObjectInspectorUtils.compareTypes(arrayElementOI, valueOI)) { + throw new UDFArgumentTypeException(VALUE_IDX, + "\"" + arrayElementOI.getTypeName() + "\"" + + " expected at function ARRAY_CONTAINS, but " + + "\"" + valueOI.getTypeName() + "\"" + + " is found"); + } + + // Check if the comparison is supported for this type + if (!ObjectInspectorUtils.compareSupported(valueOI)) { + throw new UDFArgumentException("The function " + FUNC_NAME + + " does not support comparison for " + + "\"" + valueOI.getTypeName() + "\"" + + " types"); + } + + result = new BooleanWritable(false); + + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } + + + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + + result.set(false); + + Object array = arguments[ARRAY_IDX].get(); + Object value = arguments[VALUE_IDX].get(); + + int arrayLength = arrayOI.getListLength(array); + + // Check if array is null or empty or value is null + if (value == null || arrayLength <= 0) { + return result; + } + + // Compare the value to each element of array until a match is found + for (int i=0; i SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1; + true +PREHOOK: query: -- evalutes function for array of primitives +SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-55_272_2665604689529775979/10000 +POSTHOOK: query: -- evalutes function for array of primitives +SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-55_272_2665604689529775979/10000 +true +PREHOOK: query: -- evaluates function for nested arrays +SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-59_607_7406750007328575030/10000 +POSTHOOK: query: -- evaluates function for nested arrays +SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-59_607_7406750007328575030/10000 +true diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 84b7f4c..e207442 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -23,11 +23,13 @@ import java.lang.reflect.Modifier; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -614,6 +616,109 @@ public final class ObjectInspectorUtils { } } + /** + * Compares two types identified by the given object inspectors. This method + * compares the types as follows: + *
    + *
  1. If the given inspectors do not belong to same category, the result is + * negative.
  2. + *
  3. If the given inspectors are for PRIMITIVE type, the result + * is the comparison of their type names.
  4. + *
  5. If the given inspectors are for LIST type, then the result + * is recursive call to compare the type of list elements.
  6. + *
  7. If the given inspectors are MAP type, then the result is a + * recursive call to compare the map key and value types.
  8. + *
  9. If the given inspectors are STRUCT type, then the result + * is negative if they do not have the same number of fields. If they do have + * the same number of fields, the result is a recursive call to compare each + * of the field types.
  10. + *
  11. If none of the above, the result is negative.
  12. + *
+ * @param o1 + * @param o2 + * @return true if the given object inspectors represent the same types. + */ + public static boolean compareTypes(ObjectInspector o1, ObjectInspector o2) { + Category c1 = o1.getCategory(); + Category c2 = o2.getCategory(); + + // Return false if categories are not equal + if (!c1.equals(c2)) { + return false; + } + + // If both categories are primitive return the comparison of type names. + if (c1.equals(Category.PRIMITIVE)) { + return o1.getTypeName().equals(o2.getTypeName()); + } + + // If lists, recursively compare the list element types + if (c1.equals(Category.LIST)) { + ObjectInspector child1 = + ((ListObjectInspector) o1).getListElementObjectInspector(); + ObjectInspector child2 = + ((ListObjectInspector) o2).getListElementObjectInspector(); + return compareTypes(child1, child2); + } + + // If maps, recursively compare the key and value types + if (c1.equals(Category.MAP)) { + MapObjectInspector mapOI1 = (MapObjectInspector) o1; + MapObjectInspector mapOI2 = (MapObjectInspector) o2; + + ObjectInspector childKey1 = mapOI1.getMapKeyObjectInspector(); + ObjectInspector childKey2 = mapOI2.getMapKeyObjectInspector(); + + if (compareTypes(childKey1, childKey2)) { + ObjectInspector childVal1 = mapOI1.getMapValueObjectInspector(); + ObjectInspector childVal2 = mapOI2.getMapValueObjectInspector(); + + if (compareTypes(childVal1, childVal2)) { + return true; + } + } + + return false; + } + + // If structs, recursively compare the fields + if (c1.equals(Category.STRUCT)) { + StructObjectInspector structOI1 = (StructObjectInspector) o1; + StructObjectInspector structOI2 = (StructObjectInspector) o2; + + List childFieldsList1 + = structOI1.getAllStructFieldRefs(); + List childFieldsList2 + = structOI2.getAllStructFieldRefs(); + + if (childFieldsList1 == null && childFieldsList2 == null) { + return true; + } + + if (childFieldsList1.size() != childFieldsList2.size()) { + return false; + } + + Iterator it1 = childFieldsList1.iterator(); + Iterator it2 = childFieldsList2.iterator(); + while (it1.hasNext()) { + StructField field1 = it1.next(); + StructField field2 = it2.next(); + + if (!compareTypes(field1.getFieldObjectInspector(), + field2.getFieldObjectInspector())) { + return false; + } + } + + return true; + } + + // Unknown category + throw new RuntimeException("Unknown category encountered: " + c1); + } + + private ObjectInspectorUtils() { // prevent instantiation }