diff --git eclipse-templates/.settings/org.eclipse.jdt.ui.prefs eclipse-templates/.settings/org.eclipse.jdt.ui.prefs index a6391d3..3aa7a3d 100644 --- eclipse-templates/.settings/org.eclipse.jdt.ui.prefs +++ eclipse-templates/.settings/org.eclipse.jdt.ui.prefs @@ -32,7 +32,7 @@ cleanup.remove_private_constructors=true cleanup.remove_trailing_whitespaces=true cleanup.remove_trailing_whitespaces_all=true cleanup.remove_trailing_whitespaces_ignore_empty=false -cleanup.remove_unnecessary_casts=true +cleanup.remove_unnecessary_casts=false cleanup.remove_unnecessary_nls_tags=true cleanup.remove_unused_imports=true cleanup.remove_unused_local_variables=true @@ -91,7 +91,7 @@ sp_cleanup.remove_private_constructors=true sp_cleanup.remove_trailing_whitespaces=true sp_cleanup.remove_trailing_whitespaces_all=true sp_cleanup.remove_trailing_whitespaces_ignore_empty=false -sp_cleanup.remove_unnecessary_casts=true +sp_cleanup.remove_unnecessary_casts=false sp_cleanup.remove_unnecessary_nls_tags=false sp_cleanup.remove_unused_imports=true sp_cleanup.remove_unused_local_variables=false diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 635746e..a9eadbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -137,6 +137,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; @@ -309,7 +310,7 @@ public final class FunctionRegistry { // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); registerGenericUDAF("min", new GenericUDAFMin()); - + registerGenericUDAF("sum", new GenericUDAFSum()); registerGenericUDAF("count", new GenericUDAFCount()); registerGenericUDAF("avg", new GenericUDAFAverage()); @@ -323,7 +324,7 @@ public final class FunctionRegistry { registerGenericUDAF("var_samp", new GenericUDAFVarianceSample()); registerUDAF("percentile", UDAFPercentile.class); - + // Generic UDFs registerGenericUDF("array", GenericUDFArray.class); registerGenericUDF("map", GenericUDFMap.class); @@ -338,6 +339,7 @@ public final class FunctionRegistry { registerGenericUDF("locate", GenericUDFLocate.class); registerGenericUDF("elt", GenericUDFElt.class); registerGenericUDF("concat_ws", GenericUDFConcatWS.class); + registerGenericUDF("array_contains", GenericUDFArrayContains.class); // Generic UDTF's registerGenericUDTF("explode", GenericUDTFExplode.class); @@ -427,7 +429,7 @@ public final class FunctionRegistry { /** * Returns a set of registered function names. This is used for the CLI * command "SHOW FUNCTIONS;" - * + * * @return set of strings contains function names */ public static Set getFunctionNames() { @@ -438,7 +440,7 @@ public final class FunctionRegistry { * Returns a set of registered function names. This is used for the CLI * command "SHOW FUNCTIONS 'regular expression';" Returns an empty set when * the regular expression is not valid. - * + * * @param funcPatternStr * regular expression of the interested function names * @return set of strings contains function names @@ -461,7 +463,7 @@ public final class FunctionRegistry { /** * Returns the set of synonyms of the supplied function. - * + * * @param funcName * the name of the function * @return Set of synonyms for funcName @@ -509,10 +511,10 @@ public final class FunctionRegistry { /** * Find a common class that objects of both TypeInfo a and TypeInfo b can * convert to. This is used for comparing objects of type a and type b. - * + * * When we are comparing string and double, we will always convert both of * them to double and then compare. - * + * * @return null if no common class could be found. */ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { @@ -533,9 +535,9 @@ public final class FunctionRegistry { /** * Find a common class that objects of both TypeInfo a and TypeInfo b can * convert to. This is used for places other than comparison. - * + * * The common class of string and double is string. - * + * * @return null if no common class could be found. */ public static TypeInfo getCommonClass(TypeInfo a, TypeInfo b) { @@ -581,7 +583,7 @@ public final class FunctionRegistry { /** * Get the GenericUDAF evaluator for the name and argumentClasses. - * + * * @param name * the name of the UDAF * @param argumentTypeInfos @@ -605,7 +607,7 @@ public final class FunctionRegistry { * This method is shared between UDFRegistry and UDAFRegistry. methodName will * be "evaluate" for UDFRegistry, and "aggregate"/"evaluate"/"evaluatePartial" * for UDAFRegistry. - * @throws UDFArgumentException + * @throws UDFArgumentException */ public static Method getMethodInternal(Class udfClass, String methodName, boolean exact, List argumentClasses) @@ -768,7 +770,7 @@ public final class FunctionRegistry { /** * Gets the closest matching method corresponding to the argument list from a * list of methods. - * + * * @param mlist * The list of methods to inspect. * @param exact @@ -784,7 +786,7 @@ public final class FunctionRegistry { List udfMethods = new ArrayList(); // The cost of the result int leastConversionCost = Integer.MAX_VALUE; - + for (Method m : mlist) { List argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size()); @@ -828,14 +830,14 @@ public final class FunctionRegistry { } } } - + if (udfMethods.size() == 0) { // No matching methods found - throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist); + throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist); } if (udfMethods.size() > 1) { // Ambiguous method found - throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist); + throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist); } return udfMethods.get(0); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayContains.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayContains.java new file mode 100644 index 0000000..182a0dc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayContains.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; + +/** + * GenericUDFArrayContains. + * + */ +@Description(name = "array_contains", + value="_FUNC_(array, value) - Returns TRUE if the array contains value.", + extended="Example:\n" + + " > SELECT _FUNC_(array(1, 2, 3), 2) FROM src LIMIT 1;\n" + + " true") +public class GenericUDFArrayContains extends GenericUDF { + + private static final int ARRAY_IDX = 0; + private static final int VALUE_IDX = 1; + private static final int ARG_COUNT = 2; // Number of arguments to this UDF + private static final String FUNC_NAME = "ARRAY_CONTAINS"; // External Name + + private ObjectInspector valueOI; + private ListObjectInspector arrayOI; + private ObjectInspector arrayElementOI; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if two arguments were passed + if (arguments.length != ARG_COUNT) { + throw new UDFArgumentException( + "The function " + FUNC_NAME + " accepts " + + ARG_COUNT + " arguments."); + } + + // Check if ARRAY_IDX argument is of category LIST + if (!arguments[ARRAY_IDX].getCategory().equals(Category.LIST)) { + throw new UDFArgumentTypeException(ARRAY_IDX, + "\"" + org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME + "\" " + + "expected at function ARRAY_CONTAINS, but " + + "\"" + arguments[ARRAY_IDX].getTypeName() + "\" " + + "is found"); + } + + arrayOI = (ListObjectInspector) arguments[ARRAY_IDX]; + arrayElementOI = arrayOI.getListElementObjectInspector(); + + valueOI = arguments[VALUE_IDX]; + + // Check if list element and value are of same type + if (!ObjectInspectorUtils.compareTypes(arrayElementOI, valueOI)) { + throw new UDFArgumentTypeException(VALUE_IDX, + "\"" + arrayElementOI.getTypeName() + "\"" + + " expected at function ARRAY_CONTAINS, but " + + "\"" + valueOI.getTypeName() + "\"" + + " is found"); + } + + // Check if the comparison is supported for this type + if (!ObjectInspectorUtils.compareSupported(valueOI)) { + throw new UDFArgumentException("The function " + FUNC_NAME + + " does not support comparison for " + + "\"" + valueOI.getTypeName() + "\"" + + " types"); + } + + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + BooleanWritable result = new BooleanWritable(false); + + Object array = arguments[ARRAY_IDX].get(); + Object value = arguments[VALUE_IDX].get(); + + int arrayLength = arrayOI.getListLength(array); + + // Check if array is null or empty or value is null + if (value == null || arrayLength <= 0) { + return result; + } + + // Compare the value to each element of array until a match is found + for (int i=0; i SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1; + true +PREHOOK: query: -- evalutes function for array of primitives +SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-55_272_2665604689529775979/10000 +POSTHOOK: query: -- evalutes function for array of primitives +SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-55_272_2665604689529775979/10000 +true +PREHOOK: query: -- evaluates function for nested arrays +SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-59_607_7406750007328575030/10000 +POSTHOOK: query: -- evaluates function for nested arrays +SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/Cloudera/src/hive/build/ql/scratchdir/hive_2010-03-02_20-08-59_607_7406750007328575030/10000 +true diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 1663b2c..88b82c9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -23,11 +23,13 @@ import java.lang.reflect.Modifier; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -46,7 +48,7 @@ import org.apache.hadoop.util.StringUtils; /** * ObjectInspectorFactory is the primary way to create new ObjectInspector * instances. - * + * * SerDe classes should call the static functions in this library to create an * ObjectInspector to return to the caller of SerDe2.getObjectInspector(). */ @@ -68,7 +70,7 @@ public final class ObjectInspectorUtils { /** * Get the corresponding standard ObjectInspector for an ObjectInspector. - * + * * The returned ObjectInspector can be used to inspect the standard object. */ public static ObjectInspector getStandardObjectInspector(ObjectInspector oi) { @@ -424,7 +426,7 @@ public final class ObjectInspectorUtils { return false; } } - + /** * Compare two objects with their respective ObjectInspectors. */ @@ -589,6 +591,107 @@ public final class ObjectInspectorUtils { } } + /** + * Compares two types identified by the given object inspectors. This method + * compares the types as follows: + *
    + *
  1. If the given inspectors do not belong to same category, the result is + * negative.
  2. + *
  3. If the given inspectors are for PRIMITIVE type, the result + * is the comparison of their type names.
  4. + *
  5. If the given inspectors are for LIST type, then the result + * is recursive call to compare the type of list elements.
  6. + *
  7. If the given inspectors are MAP type, then the result is a + * recursive call to compare the map key and value types.
  8. + *
  9. If the given inspectors are STRUCT type, then thee result + * is negative if they do not have the same number of fields. If they do have + * the same number of fields, the result is a recursive call to compare each + * of the field types.
  10. + *
  11. If none of the above, the result is negative.
  12. + *
+ * @param o1 + * @param o2 + * @return true if the given object inspectors represent the same types. + */ + public static boolean compareTypes(ObjectInspector o1, ObjectInspector o2) { + Category c1 = o1.getCategory(); + Category c2 = o2.getCategory(); + + // Return false if categories are not equal + if (!c1.equals(c2)) { + return false; + } + + // If both categories are primitive return the comparison of type names. + if (c1.equals(Category.PRIMITIVE)) { + return o1.getTypeName().equals(o2.getTypeName()); + } + + // If lists, recursively compare the list element types + if (c1.equals(Category.LIST)) { + ObjectInspector child1 = + ((ListObjectInspector) o1).getListElementObjectInspector(); + ObjectInspector child2 = + ((ListObjectInspector) o2).getListElementObjectInspector(); + return compareTypes(child1, child2); + } + + // If maps, recursively compare the key and value types + if (c1.equals(Category.MAP)) { + MapObjectInspector mapOI1 = (MapObjectInspector) o1; + MapObjectInspector mapOI2 = (MapObjectInspector) o2; + + ObjectInspector childKey1 = mapOI1.getMapKeyObjectInspector(); + ObjectInspector childKey2 = mapOI2.getMapKeyObjectInspector(); + + if (compareTypes(childKey1, childKey2)) { + ObjectInspector childVal1 = mapOI1.getMapValueObjectInspector(); + ObjectInspector childVal2 = mapOI2.getMapValueObjectInspector(); + + if (compareTypes(childVal1, childVal2)) { + return true; + } + } + + return false; + } + + // If structs, recursively compare the fields + if (c1.equals(Category.STRUCT)) { + StructObjectInspector structOI1 = (StructObjectInspector) o1; + StructObjectInspector structOI2 = (StructObjectInspector) o2; + + List childFieldsList1 + = structOI1.getAllStructFieldRefs(); + List childFieldsList2 + = structOI2.getAllStructFieldRefs(); + + if (childFieldsList1 == null && childFieldsList2 == null) { + return true; + } + + if (childFieldsList1.size() != childFieldsList2.size()) { + return false; + } + + Iterator it1 = childFieldsList1.iterator(); + Iterator it2 = childFieldsList2.iterator(); + while (it1.hasNext()) { + StructField field1 = it1.next(); + StructField field2 = it2.next(); + + if (!compareTypes(field1.getFieldObjectInspector(), + field2.getFieldObjectInspector())) { + return false; + } + } + + return true; + } + + // Unknow category + throw new RuntimeException("Unknown category encountered: " + c1); + } private ObjectInspectorUtils() { // prevent instantiation }