diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index d7a867b..2518c41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -56,7 +56,6 @@ import org.apache.hadoop.hive.ql.udf.UDFBase64; import org.apache.hadoop.hive.ql.udf.UDFBin; import org.apache.hadoop.hive.ql.udf.UDFCeil; -import org.apache.hadoop.hive.ql.udf.UDFConcat; import org.apache.hadoop.hive.ql.udf.UDFConv; import org.apache.hadoop.hive.ql.udf.UDFCos; import org.apache.hadoop.hive.ql.udf.UDFDate; @@ -80,7 +79,6 @@ import org.apache.hadoop.hive.ql.udf.UDFLog; import org.apache.hadoop.hive.ql.udf.UDFLog10; import org.apache.hadoop.hive.ql.udf.UDFLog2; -import org.apache.hadoop.hive.ql.udf.UDFLower; import org.apache.hadoop.hive.ql.udf.UDFLpad; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; @@ -129,7 +127,6 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.UDFUnbase64; import org.apache.hadoop.hive.ql.udf.UDFUnhex; -import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; @@ -167,7 +164,6 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; - /** * FunctionRegistry. */ @@ -178,26 +174,26 @@ /** * The mapping from expression function names to expression classes. */ - static Map mFunctions = Collections.synchronizedMap(new LinkedHashMap()); + static Map mFunctions = Collections + .synchronizedMap(new LinkedHashMap()); /* * PTF variables - * */ + */ public static final String LEAD_FUNC_NAME = "lead"; public static final String LAG_FUNC_NAME = "lag"; public static final String LAST_VALUE_FUNC_NAME = "last_value"; - public static final String WINDOWING_TABLE_FUNCTION = "windowingtablefunction"; public static final String NOOP_TABLE_FUNCTION = "noop"; public static final String NOOP_MAP_TABLE_FUNCTION = "noopwithmap"; - static Map windowFunctions = Collections.synchronizedMap(new LinkedHashMap()); - + static Map windowFunctions = Collections + .synchronizedMap(new LinkedHashMap()); static { - registerUDF("concat", UDFConcat.class, false); + registerGenericUDF("concat", GenericUDFConcat.class); registerUDF("substr", UDFSubstr.class, false); registerUDF("substring", UDFSubstr.class, false); registerUDF("space", UDFSpace.class, false); @@ -246,10 +242,10 @@ registerGenericUDF("encode", GenericUDFEncode.class); registerGenericUDF("decode", GenericUDFDecode.class); - registerUDF("upper", UDFUpper.class, false); - registerUDF("lower", UDFLower.class, false); - registerUDF("ucase", UDFUpper.class, false); - registerUDF("lcase", UDFLower.class, false); + registerGenericUDF("upper", GenericUDFUpper.class); + registerGenericUDF("lower", GenericUDFLower.class); + registerGenericUDF("ucase", GenericUDFUpper.class); + registerGenericUDF("lcase", GenericUDFLower.class); registerUDF("trim", UDFTrim.class, false); registerUDF("ltrim", UDFLTrim.class, false); registerUDF("rtrim", UDFRTrim.class, false); @@ -335,7 +331,6 @@ registerGenericUDF("ewah_bitmap_or", GenericUDFEWAHBitmapOr.class); registerGenericUDF("ewah_bitmap_empty", GenericUDFEWAHBitmapEmpty.class); - // Aliases for Java Class Names // These are used in getImplicitConvertUDFMethod registerUDF(serdeConstants.BOOLEAN_TYPE_NAME, UDFToBoolean.class, false, @@ -355,8 +350,7 @@ registerUDF(serdeConstants.STRING_TYPE_NAME, UDFToString.class, false, UDFToString.class.getSimpleName()); - registerGenericUDF(serdeConstants.DATE_TYPE_NAME, - GenericUDFToDate.class); + registerGenericUDF(serdeConstants.DATE_TYPE_NAME, GenericUDFToDate.class); registerGenericUDF(serdeConstants.TIMESTAMP_TYPE_NAME, GenericUDFTimestamp.class); registerGenericUDF(serdeConstants.BINARY_TYPE_NAME, @@ -392,11 +386,10 @@ registerGenericUDAF("ewah_bitmap", new GenericUDAFEWAHBitmap()); - registerGenericUDAF("compute_stats" , new GenericUDAFComputeStats()); + registerGenericUDAF("compute_stats", new GenericUDAFComputeStats()); registerUDAF("percentile", UDAFPercentile.class); - // Generic UDFs registerGenericUDF("reflect", GenericUDFReflect.class); registerGenericUDF("reflect2", GenericUDFReflect2.class); @@ -440,7 +433,7 @@ registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class); registerGenericUDTF("stack", GenericUDTFStack.class); - //PTF declarations + // PTF declarations registerGenericUDF(true, LEAD_FUNC_NAME, GenericUDFLead.class); registerGenericUDF(true, LAG_FUNC_NAME, GenericUDFLag.class); @@ -458,7 +451,8 @@ registerTableFunction(NOOP_TABLE_FUNCTION, NoopResolver.class); registerTableFunction(NOOP_MAP_TABLE_FUNCTION, NoopWithMapResolver.class); - registerTableFunction(WINDOWING_TABLE_FUNCTION, WindowingTableFunctionResolver.class); + registerTableFunction(WINDOWING_TABLE_FUNCTION, + WindowingTableFunctionResolver.class); registerTableFunction("npath", NPathResolver.class); } @@ -474,8 +468,8 @@ static void registerUDF(String functionName, Class UDFClass, public static void registerUDF(boolean isNative, String functionName, Class UDFClass, boolean isOperator) { - registerUDF(isNative, functionName, UDFClass, isOperator, functionName - .toLowerCase()); + registerUDF(isNative, functionName, UDFClass, isOperator, + functionName.toLowerCase()); } public static void registerUDF(String functionName, @@ -546,7 +540,7 @@ public static FunctionInfo getFunctionInfo(String functionName) { /** * Returns a set of registered function names. This is used for the CLI * command "SHOW FUNCTIONS;" - * + * * @return set of strings contains function names */ public static Set getFunctionNames() { @@ -557,7 +551,7 @@ public static FunctionInfo getFunctionInfo(String functionName) { * Returns a set of registered function names. This is used for the CLI * command "SHOW FUNCTIONS 'regular expression';" Returns an empty set when * the regular expression is not valid. - * + * * @param funcPatternStr * regular expression of the interested function names * @return set of strings contains function names @@ -580,7 +574,7 @@ public static FunctionInfo getFunctionInfo(String functionName) { /** * Returns the set of synonyms of the supplied function. - * + * * @param funcName * the name of the function * @return Set of synonyms for funcName @@ -610,8 +604,8 @@ public static FunctionInfo getFunctionInfo(String functionName) { // are common/convertible to one another. Probably better to rely on the // ordering explicitly defined here than to assume that the enum values // that were arbitrarily assigned in PrimitiveCategory work for our purposes. - static EnumMap numericTypes = - new EnumMap(PrimitiveCategory.class); + static EnumMap numericTypes = new EnumMap( + PrimitiveCategory.class); static List numericTypeList = new ArrayList(); static void registerNumericType(PrimitiveCategory primitiveCategory, int level) { @@ -642,37 +636,42 @@ static int getCommonLength(int aLen, int bLen) { } /** - * Given 2 TypeInfo types and the PrimitiveCategory selected as the common class between the two, - * return a TypeInfo corresponding to the common PrimitiveCategory, and with type qualifiers - * (if applicable) that match the 2 TypeInfo types. - * Examples: - * varchar(10), varchar(20), primitive category varchar => varchar(20) - * date, string, primitive category string => string - * @param a TypeInfo of the first type - * @param b TypeInfo of the second type - * @param typeCategory PrimitiveCategory of the designated common type between a and b - * @return TypeInfo represented by the primitive category, with any applicable type qualifiers. + * Given 2 TypeInfo types and the PrimitiveCategory selected as the common + * class between the two, return a TypeInfo corresponding to the common + * PrimitiveCategory, and with type qualifiers (if applicable) that match the + * 2 TypeInfo types. Examples: varchar(10), varchar(20), primitive category + * varchar => varchar(20) date, string, primitive category string => string + * + * @param a + * TypeInfo of the first type + * @param b + * TypeInfo of the second type + * @param typeCategory + * PrimitiveCategory of the designated common type between a and b + * @return TypeInfo represented by the primitive category, with any applicable + * type qualifiers. */ - public static TypeInfo getTypeInfoForPrimitiveCategory( - PrimitiveTypeInfo a, PrimitiveTypeInfo b, PrimitiveCategory typeCategory) { - // For types with parameters (like varchar), we need to determine the type parameters + public static TypeInfo getTypeInfoForPrimitiveCategory(PrimitiveTypeInfo a, + PrimitiveTypeInfo b, PrimitiveCategory typeCategory) { + // For types with parameters (like varchar), we need to determine the type + // parameters // that should be added to this type, based on the original 2 TypeInfos. switch (typeCategory) { - case VARCHAR: - int maxLength = getCommonLength( - TypeInfoUtils.getCharacterLengthForType(a), - TypeInfoUtils.getCharacterLengthForType(b)); - VarcharTypeParams varcharParams = new VarcharTypeParams(); - varcharParams.setLength(maxLength); - String typeName = - PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveCategory(typeCategory).typeName - + varcharParams.toString(); - return TypeInfoFactory.getPrimitiveTypeInfo(typeName); + case VARCHAR: + int maxLength = getCommonLength( + TypeInfoUtils.getCharacterLengthForType(a), + TypeInfoUtils.getCharacterLengthForType(b)); + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(maxLength); + String typeName = PrimitiveObjectInspectorUtils + .getTypeEntryFromPrimitiveCategory(typeCategory).typeName + + varcharParams.toString(); + return TypeInfoFactory.getPrimitiveTypeInfo(typeName); - default: - // Type doesn't require any qualifiers. - return TypeInfoFactory.getPrimitiveTypeInfo( - PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveCategory(typeCategory).typeName); + default: + // Type doesn't require any qualifiers. + return TypeInfoFactory.getPrimitiveTypeInfo(PrimitiveObjectInspectorUtils + .getTypeEntryFromPrimitiveCategory(typeCategory).typeName); } } @@ -683,35 +682,43 @@ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { if (a.equals(b)) { return a; } - if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { + if (a.getCategory() != Category.PRIMITIVE + || b.getCategory() != Category.PRIMITIVE) { return null; } - PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); - PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); + PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory(); + PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory(); if (pcA == pcB) { // Same primitive category but different qualifiers. - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, pcA); } - PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); - PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); + PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(pcA); + PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(pcB); // handle string types properly - if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { - return getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + if (pgA == PrimitiveGrouping.STRING_GROUP + && pgB == PrimitiveGrouping.STRING_GROUP) { + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, PrimitiveCategory.STRING); } if (FunctionRegistry.implicitConvertable(a, b)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, pcB); } if (FunctionRegistry.implicitConvertable(b, a)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, pcA); } for (PrimitiveCategory t : numericTypeList) { if (FunctionRegistry.implicitConvertable(pcA, t) && FunctionRegistry.implicitConvertable(pcB, t)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, t); } } @@ -721,10 +728,10 @@ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { /** * Find a common class that objects of both TypeInfo a and TypeInfo b can * convert to. This is used for comparing objects of type a and type b. - * + * * When we are comparing string and double, we will always convert both of * them to double and then compare. - * + * * @return null if no common class could be found. */ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { @@ -732,31 +739,38 @@ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { if (a.equals(b)) { return a; } - if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { + if (a.getCategory() != Category.PRIMITIVE + || b.getCategory() != Category.PRIMITIVE) { return null; } - PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); - PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); + PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory(); + PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory(); if (pcA == pcB) { // Same primitive category but different qualifiers. // Rely on getTypeInfoForPrimitiveCategory() to sort out the type params. - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, pcA); } - PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); - PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); + PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(pcA); + PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(pcB); // handle string types properly - if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { - // Compare as strings. Char comparison semantics may be different if/when implemented. - return getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + if (pgA == PrimitiveGrouping.STRING_GROUP + && pgB == PrimitiveGrouping.STRING_GROUP) { + // Compare as strings. Char comparison semantics may be different if/when + // implemented. + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, PrimitiveCategory.STRING); } for (PrimitiveCategory t : numericTypeList) { if (FunctionRegistry.implicitConvertable(pcA, t) && FunctionRegistry.implicitConvertable(pcB, t)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, t); } } @@ -766,27 +780,31 @@ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { /** * Find a common class that objects of both TypeInfo a and TypeInfo b can * convert to. This is used for places other than comparison. - * + * * The common class of string and double is string. - * + * * @return null if no common class could be found. */ public static TypeInfo getCommonClass(TypeInfo a, TypeInfo b) { if (a.equals(b)) { return a; } - if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { + if (a.getCategory() != Category.PRIMITIVE + || b.getCategory() != Category.PRIMITIVE) { return null; } - PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); - PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); + PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory(); + PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory(); - PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); - PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); + PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(pcA); + PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(pcB); // handle string types properly - if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { - return getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + if (pgA == PrimitiveGrouping.STRING_GROUP + && pgB == PrimitiveGrouping.STRING_GROUP) { + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, PrimitiveCategory.STRING); } Integer ai = numericTypes.get(pcA); @@ -796,23 +814,29 @@ public static TypeInfo getCommonClass(TypeInfo a, TypeInfo b) { return null; } PrimitiveCategory pcCommon = (ai > bi) ? pcA : pcB; - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcCommon); + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, + (PrimitiveTypeInfo) b, pcCommon); } - public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCategory to) { + public static boolean implicitConvertable(PrimitiveCategory from, + PrimitiveCategory to) { if (from == to) { return true; } - PrimitiveGrouping fromPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(from); - PrimitiveGrouping toPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(to); + PrimitiveGrouping fromPg = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(from); + PrimitiveGrouping toPg = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(to); // Allow implicit String to Double conversion - if (fromPg == PrimitiveGrouping.STRING_GROUP && to == PrimitiveCategory.DOUBLE) { + if (fromPg == PrimitiveGrouping.STRING_GROUP + && to == PrimitiveCategory.DOUBLE) { return true; } // Allow implicit String to Decimal conversion - if (fromPg == PrimitiveGrouping.STRING_GROUP && to == PrimitiveCategory.DECIMAL) { + if (fromPg == PrimitiveGrouping.STRING_GROUP + && to == PrimitiveCategory.DECIMAL) { return true; } // Void can be converted to any type @@ -820,15 +844,18 @@ public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCateg return true; } // Allow implicit String to Date conversion - if (fromPg == PrimitiveGrouping.DATE_GROUP && toPg == PrimitiveGrouping.STRING_GROUP) { + if (fromPg == PrimitiveGrouping.DATE_GROUP + && toPg == PrimitiveGrouping.STRING_GROUP) { return true; } // Allow implicit Numeric to String conversion - if (fromPg == PrimitiveGrouping.NUMERIC_GROUP && toPg == PrimitiveGrouping.STRING_GROUP) { + if (fromPg == PrimitiveGrouping.NUMERIC_GROUP + && toPg == PrimitiveGrouping.STRING_GROUP) { return true; } // Allow implicit String to varchar conversion, and vice versa - if (fromPg == PrimitiveGrouping.STRING_GROUP && toPg == PrimitiveGrouping.STRING_GROUP) { + if (fromPg == PrimitiveGrouping.STRING_GROUP + && toPg == PrimitiveGrouping.STRING_GROUP) { return true; } @@ -855,19 +882,21 @@ public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { } // Reimplemented to use PrimitiveCategory rather than TypeInfo, because - // 2 TypeInfos from the same qualified type (varchar, decimal) should still be + // 2 TypeInfos from the same qualified type (varchar, decimal) should still + // be // seen as equivalent. - if (from.getCategory() == Category.PRIMITIVE && to.getCategory() == Category.PRIMITIVE) { + if (from.getCategory() == Category.PRIMITIVE + && to.getCategory() == Category.PRIMITIVE) { return implicitConvertable( - ((PrimitiveTypeInfo)from).getPrimitiveCategory(), - ((PrimitiveTypeInfo)to).getPrimitiveCategory()); + ((PrimitiveTypeInfo) from).getPrimitiveCategory(), + ((PrimitiveTypeInfo) to).getPrimitiveCategory()); } return false; } /** * Get the GenericUDAF evaluator for the name and argumentClasses. - * + * * @param name * the name of the UDAF * @param argumentOIs @@ -893,12 +922,11 @@ public static GenericUDAFEvaluator getGenericUDAFEvaluator(String name, args[ii] = argumentOIs.get(ii); } - GenericUDAFParameterInfo paramInfo = - new SimpleGenericUDAFParameterInfo( - args, isDistinct, isAllColumns); + GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo( + args, isDistinct, isAllColumns); if (udafResolver instanceof GenericUDAFResolver2) { - udafEvaluator = - ((GenericUDAFResolver2) udafResolver).getEvaluator(paramInfo); + udafEvaluator = ((GenericUDAFResolver2) udafResolver) + .getEvaluator(paramInfo); } else { udafEvaluator = udafResolver.getEvaluator(paramInfo.getParameters()); } @@ -911,17 +939,21 @@ public static GenericUDAFEvaluator getGenericWindowingEvaluator(String name, boolean isAllColumns) throws SemanticException { WindowFunctionInfo finfo = windowFunctions.get(name.toLowerCase()); - if (finfo == null) { return null;} - if ( !name.toLowerCase().equals(LEAD_FUNC_NAME) && - !name.toLowerCase().equals(LAG_FUNC_NAME) ) { - return getGenericUDAFEvaluator(name, argumentOIs, isDistinct, isAllColumns); + if (finfo == null) { + return null; + } + if (!name.toLowerCase().equals(LEAD_FUNC_NAME) + && !name.toLowerCase().equals(LAG_FUNC_NAME)) { + return getGenericUDAFEvaluator(name, argumentOIs, isDistinct, + isAllColumns); } // this must be lead/lag UDAF ObjectInspector args[] = new ObjectInspector[argumentOIs.size()]; - GenericUDAFResolver udafResolver = finfo.getfInfo().getGenericUDAFResolver(); + GenericUDAFResolver udafResolver = finfo.getfInfo() + .getGenericUDAFResolver(); GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo( - argumentOIs.toArray(args), isDistinct, isAllColumns); + argumentOIs.toArray(args), isDistinct, isAllColumns); return ((GenericUDAFResolver2) udafResolver).getEvaluator(paramInfo); } @@ -929,6 +961,7 @@ public static GenericUDAFEvaluator getGenericWindowingEvaluator(String name, * This method is shared between UDFRegistry and UDAFRegistry. methodName will * be "evaluate" for UDFRegistry, and "aggregate"/"evaluate"/"evaluatePartial" * for UDAFRegistry. + * * @throws UDFArgumentException */ public static Method getMethodInternal(Class udfClass, @@ -975,10 +1008,11 @@ public static void registerUDAF(boolean isNative, String functionName, Class udafClass) { mFunctions.put(functionName.toLowerCase(), new FunctionInfo(isNative, functionName.toLowerCase(), new GenericUDAFBridge( - (UDAF) ReflectionUtils.newInstance(udafClass, null)))); + (UDAF) ReflectionUtils.newInstance(udafClass, null)))); } - public static void unregisterTemporaryUDF(String functionName) throws HiveException { + public static void unregisterTemporaryUDF(String functionName) + throws HiveException { FunctionInfo fi = mFunctions.get(functionName.toLowerCase()); if (fi != null) { if (!fi.isNative()) { @@ -1044,7 +1078,8 @@ public static Object invoke(Method m, Object thisObject, Object... arguments) public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) { if (argumentAccepted.equals(argumentPassed) - || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, argumentAccepted)) { + || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, + argumentAccepted)) { // matches return 0; } @@ -1093,37 +1128,46 @@ public static int matchCost(TypeInfo argumentPassed, } /** - * Given a set of candidate methods and list of argument types, try to - * select the best candidate based on how close the passed argument types are - * to the candidate argument types. - * For a varchar argument, we would prefer evaluate(string) over evaluate(double). - * @param udfMethods list of candidate methods - * @param argumentsPassed list of argument types to match to the candidate methods + * Given a set of candidate methods and list of argument types, try to select + * the best candidate based on how close the passed argument types are to the + * candidate argument types. For a varchar argument, we would prefer + * evaluate(string) over evaluate(double). + * + * @param udfMethods + * list of candidate methods + * @param argumentsPassed + * list of argument types to match to the candidate methods */ - static void filterMethodsByTypeAffinity(List udfMethods, List argumentsPassed) { + static void filterMethodsByTypeAffinity(List udfMethods, + List argumentsPassed) { if (udfMethods.size() > 1) { - // Prefer methods with a closer signature based on the primitive grouping of each argument. + // Prefer methods with a closer signature based on the primitive grouping + // of each argument. // Score each method based on its similarity to the passed argument types. int currentScore = 0; int bestMatchScore = 0; Method bestMatch = null; - for (Method m: udfMethods) { + for (Method m : udfMethods) { currentScore = 0; - List argumentsAccepted = - TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size()); + List argumentsAccepted = TypeInfoUtils.getParameterTypeInfos( + m, argumentsPassed.size()); Iterator argsPassedIter = argumentsPassed.iterator(); for (TypeInfo acceptedType : argumentsAccepted) { - // Check the affinity of the argument passed in with the accepted argument, + // Check the affinity of the argument passed in with the accepted + // argument, // based on the PrimitiveGrouping TypeInfo passedType = argsPassedIter.next(); if (acceptedType.getCategory() == Category.PRIMITIVE && passedType.getCategory() == Category.PRIMITIVE) { - PrimitiveGrouping acceptedPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping( - ((PrimitiveTypeInfo) acceptedType).getPrimitiveCategory()); - PrimitiveGrouping passedPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping( - ((PrimitiveTypeInfo) passedType).getPrimitiveCategory()); + PrimitiveGrouping acceptedPg = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(((PrimitiveTypeInfo) acceptedType) + .getPrimitiveCategory()); + PrimitiveGrouping passedPg = PrimitiveObjectInspectorUtils + .getPrimitiveGrouping(((PrimitiveTypeInfo) passedType) + .getPrimitiveCategory()); if (acceptedPg == passedPg) { - // The passed argument matches somewhat closely with an accepted argument + // The passed argument matches somewhat closely with an accepted + // argument ++currentScore; } } @@ -1148,7 +1192,7 @@ static void filterMethodsByTypeAffinity(List udfMethods, List /** * Gets the closest matching method corresponding to the argument list from a * list of methods. - * + * * @param mlist * The list of methods to inspect. * @param exact @@ -1157,8 +1201,9 @@ static void filterMethodsByTypeAffinity(List udfMethods, List * The classes for the argument. * @return The matching method. */ - public static Method getMethodInternal(Class udfClass, List mlist, boolean exact, - List argumentsPassed) throws UDFArgumentException { + public static Method getMethodInternal(Class udfClass, List mlist, + boolean exact, List argumentsPassed) + throws UDFArgumentException { // result List udfMethods = new ArrayList(); @@ -1187,8 +1232,8 @@ public static Method getMethodInternal(Class udfClass, List mlist, bo } if (LOG.isDebugEnabled()) { LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = " - + argumentsPassed + " accepted = " + argumentsAccepted + - " method = " + m); + + argumentsPassed + " accepted = " + argumentsAccepted + + " method = " + m); } if (match) { // Always choose the function with least implicit conversions. @@ -1217,7 +1262,8 @@ public static Method getMethodInternal(Class udfClass, List mlist, bo } if (udfMethods.size() > 1) { - // First try selecting methods based on the type affinity of the arguments passed + // First try selecting methods based on the type affinity of the arguments + // passed // to the candidate method arguments. filterMethodsByTypeAffinity(udfMethods, argumentsPassed); } @@ -1231,10 +1277,11 @@ public static Method getMethodInternal(Class udfClass, List mlist, bo Method candidate = null; List referenceArguments = null; - for (Method m: udfMethods) { + for (Method m : udfMethods) { int maxNumericType = 0; - List argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size()); + List argumentsAccepted = TypeInfoUtils.getParameterTypeInfos( + m, argumentsPassed.size()); if (referenceArguments == null) { // keep the arguments for reference - we want all the non-numeric @@ -1244,19 +1291,21 @@ public static Method getMethodInternal(Class udfClass, List mlist, bo Iterator referenceIterator = referenceArguments.iterator(); - for (TypeInfo accepted: argumentsAccepted) { + for (TypeInfo accepted : argumentsAccepted) { TypeInfo reference = referenceIterator.next(); boolean acceptedIsPrimitive = false; PrimitiveCategory acceptedPrimCat = PrimitiveCategory.UNKNOWN; if (accepted.getCategory() == Category.PRIMITIVE) { acceptedIsPrimitive = true; - acceptedPrimCat = ((PrimitiveTypeInfo) accepted).getPrimitiveCategory(); + acceptedPrimCat = ((PrimitiveTypeInfo) accepted) + .getPrimitiveCategory(); } if (acceptedIsPrimitive && numericTypes.containsKey(acceptedPrimCat)) { // We're looking for the udf with the smallest maximum numeric type. int typeValue = numericTypes.get(acceptedPrimCat); - maxNumericType = typeValue > maxNumericType ? typeValue : maxNumericType; + maxNumericType = typeValue > maxNumericType ? typeValue + : maxNumericType; } else if (!accepted.equals(reference)) { // There are non-numeric arguments that don't match from one UDF to // another. We give up at this point. @@ -1310,29 +1359,33 @@ public static GenericUDF cloneGenericUDF(GenericUDF genericUDF) { GenericUDF clonedUDF = null; if (genericUDF instanceof GenericUDFBridge) { GenericUDFBridge bridge = (GenericUDFBridge) genericUDF; - clonedUDF = new GenericUDFBridge(bridge.getUdfName(), bridge.isOperator(), - bridge.getUdfClassName()); + clonedUDF = new GenericUDFBridge(bridge.getUdfName(), + bridge.isOperator(), bridge.getUdfClassName()); } else if (genericUDF instanceof GenericUDFMacro) { GenericUDFMacro bridge = (GenericUDFMacro) genericUDF; clonedUDF = new GenericUDFMacro(bridge.getMacroName(), bridge.getBody(), bridge.getColNames(), bridge.getColTypes()); } else { - clonedUDF = (GenericUDF) ReflectionUtils - .newInstance(genericUDF.getClass(), null); + clonedUDF = (GenericUDF) ReflectionUtils.newInstance( + genericUDF.getClass(), null); } if (clonedUDF != null) { - // The original may have settable info that needs to be added to the new copy. + // The original may have settable info that needs to be added to the new + // copy. if (genericUDF instanceof SettableUDF) { try { - Object settableData = ((SettableUDF)genericUDF).getParams(); + Object settableData = ((SettableUDF) genericUDF).getParams(); if (settableData != null) { - ((SettableUDF)clonedUDF).setParams(settableData); + ((SettableUDF) clonedUDF).setParams(settableData); } } catch (UDFArgumentException err) { - // In theory this should not happen - if the original copy of the UDF had this - // data, we should be able to set the UDF copy with this same settableData. - LOG.error("Unable to add settable data to UDF " + genericUDF.getClass()); + // In theory this should not happen - if the original copy of the UDF + // had this + // data, we should be able to set the UDF copy with this same + // settableData. + LOG.error("Unable to add settable data to UDF " + + genericUDF.getClass()); throw new IllegalArgumentException(err); } } @@ -1356,7 +1409,8 @@ public static GenericUDTF cloneGenericUDTF(GenericUDTF genericUDTF) { * Get the UDF class from an exprNodeDesc. Returns null if the exprNodeDesc * does not contain a UDF class. */ - private static Class getGenericUDFClassFromExprDesc(ExprNodeDesc desc) { + private static Class getGenericUDFClassFromExprDesc( + ExprNodeDesc desc) { if (!(desc instanceof ExprNodeGenericFuncDesc)) { return null; } @@ -1482,20 +1536,21 @@ private static boolean isOpCast(ExprNodeDesc desc) { if (!(desc instanceof ExprNodeGenericFuncDesc)) { return false; } - GenericUDF genericUDF = ((ExprNodeGenericFuncDesc)desc).getGenericUDF(); + GenericUDF genericUDF = ((ExprNodeGenericFuncDesc) desc).getGenericUDF(); Class udfClass; if (genericUDF instanceof GenericUDFBridge) { - udfClass = ((GenericUDFBridge)genericUDF).getUdfClass(); + udfClass = ((GenericUDFBridge) genericUDF).getUdfClass(); } else { udfClass = genericUDF.getClass(); } - return udfClass == UDFToBoolean.class || udfClass == UDFToByte.class || - udfClass == UDFToDouble.class || udfClass == UDFToFloat.class || - udfClass == UDFToInteger.class || udfClass == UDFToLong.class || - udfClass == UDFToShort.class || udfClass == UDFToString.class || - udfClass == GenericUDFToVarchar.class || - udfClass == GenericUDFTimestamp.class || udfClass == GenericUDFToBinary.class || - udfClass == GenericUDFToDate.class; + return udfClass == UDFToBoolean.class || udfClass == UDFToByte.class + || udfClass == UDFToDouble.class || udfClass == UDFToFloat.class + || udfClass == UDFToInteger.class || udfClass == UDFToLong.class + || udfClass == UDFToShort.class || udfClass == UDFToString.class + || udfClass == GenericUDFToVarchar.class + || udfClass == GenericUDFTimestamp.class + || udfClass == GenericUDFToBinary.class + || udfClass == GenericUDFToDate.class; } /** @@ -1508,36 +1563,37 @@ public static boolean isOpPreserveInputName(ExprNodeDesc desc) { /** * Registers the appropriate kind of temporary function based on a class's * type. - * - * @param functionName name under which to register function - * - * @param udfClass class implementing UD[A|T]F - * - * @return true if udfClass's type was recognized (so registration - * succeeded); false otherwise + * + * @param functionName + * name under which to register function + * + * @param udfClass + * class implementing UD[A|T]F + * + * @return true if udfClass's type was recognized (so registration succeeded); + * false otherwise */ - public static boolean registerTemporaryFunction( - String functionName, Class udfClass) { + public static boolean registerTemporaryFunction(String functionName, + Class udfClass) { if (UDF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryUDF( - functionName, (Class) udfClass, false); + FunctionRegistry.registerTemporaryUDF(functionName, + (Class) udfClass, false); } else if (GenericUDF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryGenericUDF( - functionName, (Class) udfClass); + FunctionRegistry.registerTemporaryGenericUDF(functionName, + (Class) udfClass); } else if (GenericUDTF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryGenericUDTF( - functionName, (Class) udfClass); + FunctionRegistry.registerTemporaryGenericUDTF(functionName, + (Class) udfClass); } else if (UDAF.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryUDAF( - functionName, (Class) udfClass); + FunctionRegistry.registerTemporaryUDAF(functionName, + (Class) udfClass); } else if (GenericUDAFResolver.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTemporaryGenericUDAF( - functionName, (GenericUDAFResolver) - ReflectionUtils.newInstance(udfClass, null)); - } else if(TableFunctionResolver.class.isAssignableFrom(udfClass)) { - FunctionRegistry.registerTableFunction( - functionName, (Class)udfClass); + FunctionRegistry.registerTemporaryGenericUDAF(functionName, + (GenericUDAFResolver) ReflectionUtils.newInstance(udfClass, null)); + } else if (TableFunctionResolver.class.isAssignableFrom(udfClass)) { + FunctionRegistry.registerTableFunction(functionName, + (Class) udfClass); } else { return false; } @@ -1547,35 +1603,39 @@ public static boolean registerTemporaryFunction( /** * Registers thae appropriate kind of temporary function based on a class's * type. - * - * @param macroName name under which to register the macro - * - * @param body the expression which the macro evaluates to - * - * @param colNames the names of the arguments to the macro - * - * @param colTypes the types of the arguments to the macro + * + * @param macroName + * name under which to register the macro + * + * @param body + * the expression which the macro evaluates to + * + * @param colNames + * the names of the arguments to the macro + * + * @param colTypes + * the types of the arguments to the macro */ - public static void registerTemporaryMacro( - String macroName, ExprNodeDesc body, - List colNames, List colTypes) { + public static void registerTemporaryMacro(String macroName, + ExprNodeDesc body, List colNames, List colTypes) { - FunctionInfo fI = new FunctionInfo(false, macroName, - new GenericUDFMacro(macroName, body, colNames, colTypes)); + FunctionInfo fI = new FunctionInfo(false, macroName, new GenericUDFMacro( + macroName, body, colNames, colTypes)); mFunctions.put(macroName.toLowerCase(), fI); } /** - * Registers Hive functions from a plugin jar, using metadata from - * the jar's META-INF/class-info.xml. - * - * @param jarLocation URL for reading jar file - * - * @param classLoader classloader to use for loading function classes + * Registers Hive functions from a plugin jar, using metadata from the jar's + * META-INF/class-info.xml. + * + * @param jarLocation + * URL for reading jar file + * + * @param classLoader + * classloader to use for loading function classes */ - public static void registerFunctionsFromPluginJar( - URL jarLocation, - ClassLoader classLoader) throws Exception { + public static void registerFunctionsFromPluginJar(URL jarLocation, + ClassLoader classLoader) throws Exception { URL url = new URL("jar:" + jarLocation + "!/META-INF/class-info.xml"); InputStream inputStream = null; @@ -1596,8 +1656,8 @@ public static void registerFunctionsFromPluginJar( Class udfClass = Class.forName(javaName, true, classLoader); boolean registered = registerTemporaryFunction(sqlName, udfClass); if (!registered) { - throw new RuntimeException( - "Class " + udfClass + " is not a Hive function implementation"); + throw new RuntimeException("Class " + udfClass + + " is not a Hive function implementation"); } } } finally { @@ -1609,133 +1669,127 @@ private FunctionRegistry() { // prevent instantiation } + // ---------PTF functions------------ - //---------PTF functions------------ - - public static void registerWindowFunction(String name, GenericUDAFResolver wFn) - { + public static void registerWindowFunction(String name, GenericUDAFResolver wFn) { registerWindowFunction(name, wFn, true); } /** - * Typically a WindowFunction is the same as a UDAF. The only exceptions are Lead & Lag UDAFs. These - * are not registered as regular UDAFs because - * - we plan to support Lead & Lag as UDFs (usable only within argument expressions - * of UDAFs when windowing is involved). Since mFunctions holds both UDFs and UDAFs we cannot - * add both FunctionInfos to mFunctions. - * We choose to only register UDFs in mFunctions. The implication of this is that Lead/Lag UDAFs + * Typically a WindowFunction is the same as a UDAF. The only exceptions are + * Lead & Lag UDAFs. These are not registered as regular UDAFs because - we + * plan to support Lead & Lag as UDFs (usable only within argument expressions + * of UDAFs when windowing is involved). Since mFunctions holds both UDFs and + * UDAFs we cannot add both FunctionInfos to mFunctions. We choose to only + * register UDFs in mFunctions. The implication of this is that Lead/Lag UDAFs * are only usable when windowing is involved. - * + * * @param name * @param wFn * @param registerAsUDAF */ - public static void registerWindowFunction(String name, GenericUDAFResolver wFn, boolean registerAsUDAF) - { + public static void registerWindowFunction(String name, + GenericUDAFResolver wFn, boolean registerAsUDAF) { FunctionInfo fInfo = null; if (registerAsUDAF) { registerGenericUDAF(true, name, wFn); fInfo = getFunctionInfo(name); - } - else { - fInfo = new FunctionInfo(true, - name.toLowerCase(), wFn); + } else { + fInfo = new FunctionInfo(true, name.toLowerCase(), wFn); } WindowFunctionInfo wInfo = new WindowFunctionInfo(fInfo); windowFunctions.put(name.toLowerCase(), wInfo); } - public static WindowFunctionInfo getWindowFunctionInfo(String name) - { + public static WindowFunctionInfo getWindowFunctionInfo(String name) { return windowFunctions.get(name.toLowerCase()); } /** * Both UDF and UDAF functions can imply order for analytical functions - * + * * @param name * name of function - * @return true if a GenericUDF or GenericUDAF exists for this name and implyOrder is true, false - * otherwise. + * @return true if a GenericUDF or GenericUDAF exists for this name and + * implyOrder is true, false otherwise. */ public static boolean impliesOrder(String functionName) { FunctionInfo info = mFunctions.get(functionName.toLowerCase()); if (info != null) { if (info.isGenericUDF()) { - UDFType type = info.getGenericUDF().getClass().getAnnotation(UDFType.class); + UDFType type = info.getGenericUDF().getClass() + .getAnnotation(UDFType.class); if (type != null) { return type.impliesOrder(); } } } - WindowFunctionInfo windowInfo = windowFunctions.get(functionName.toLowerCase()); + WindowFunctionInfo windowInfo = windowFunctions.get(functionName + .toLowerCase()); if (windowInfo != null) { return windowInfo.isImpliesOrder(); } return false; } - static void registerHiveUDAFsAsWindowFunctions() - { + static void registerHiveUDAFsAsWindowFunctions() { Set fNames = getFunctionNames(); - for(String fName : fNames) - { + for (String fName : fNames) { FunctionInfo fInfo = getFunctionInfo(fName); - if ( fInfo.isGenericUDAF()) - { + if (fInfo.isGenericUDAF()) { WindowFunctionInfo wInfo = new WindowFunctionInfo(fInfo); windowFunctions.put(fName, wInfo); } } } - public static boolean isTableFunction(String name) - { + public static boolean isTableFunction(String name) { FunctionInfo tFInfo = mFunctions.get(name.toLowerCase()); - return tFInfo != null && !tFInfo.isInternalTableFunction() && tFInfo.isTableFunction(); + return tFInfo != null && !tFInfo.isInternalTableFunction() + && tFInfo.isTableFunction(); } - public static TableFunctionResolver getTableFunctionResolver(String name) - { + public static TableFunctionResolver getTableFunctionResolver(String name) { FunctionInfo tfInfo = mFunctions.get(name.toLowerCase()); - if(tfInfo.isTableFunction()) { - return (TableFunctionResolver) ReflectionUtils.newInstance(tfInfo.getFunctionClass(), null); + if (tfInfo.isTableFunction()) { + return (TableFunctionResolver) ReflectionUtils.newInstance( + tfInfo.getFunctionClass(), null); } return null; } - public static TableFunctionResolver getWindowingTableFunction() - { + public static TableFunctionResolver getWindowingTableFunction() { return getTableFunctionResolver(WINDOWING_TABLE_FUNCTION); } - public static TableFunctionResolver getNoopTableFunction() - { + public static TableFunctionResolver getNoopTableFunction() { return getTableFunctionResolver(NOOP_TABLE_FUNCTION); } - public static void registerTableFunction(String name, Class tFnCls) - { + public static void registerTableFunction(String name, + Class tFnCls) { FunctionInfo tInfo = new FunctionInfo(name, tFnCls); mFunctions.put(name.toLowerCase(), tInfo); } /** * Use this to check if function is ranking function - * + * * @param name * name of a function - * @return true if function is a UDAF, has WindowFunctionDescription annotation and the annotations - * confirms a ranking function, false otherwise + * @return true if function is a UDAF, has WindowFunctionDescription + * annotation and the annotations confirms a ranking function, false + * otherwise */ - public static boolean isRankingFunction(String name){ + public static boolean isRankingFunction(String name) { FunctionInfo info = mFunctions.get(name.toLowerCase()); GenericUDAFResolver res = info.getGenericUDAFResolver(); - if (res != null){ - WindowFunctionDescription desc = res.getClass().getAnnotation(WindowFunctionDescription.class); - if (desc != null){ + if (res != null) { + WindowFunctionDescription desc = res.getClass().getAnnotation( + WindowFunctionDescription.class); + if (desc != null) { return desc.rankingFunction(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConcat.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConcat.java deleted file mode 100755 index ed4d3ab..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFConcat.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; - -/** - * UDFConcat. - * - */ -@Description(name = "concat", - value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or "+ - "_FUNC_(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data " + - " bin1, bin2, ... binN", - extended = "Returns NULL if any argument is NULL.\n" - + "Example:\n" - + " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" - + " 'abcdef'") -public class UDFConcat extends UDF { - - public UDFConcat() { - } - - private final Text text = new Text(); - - public Text evaluate(Text... args) { - text.clear(); - for (Text arg : args) { - if (arg == null) { - return null; - } - text.append(arg.getBytes(), 0, arg.getLength()); - } - return text; - } - - public BytesWritable evaluate(BytesWritable... bw){ - - int len = 0; - for(BytesWritable bytes : bw){ - if (bytes == null){ - return null; -} - len += bytes.getLength(); - } - - byte[] out = new byte[len]; - int curLen = 0; - // Need to iterate twice since BytesWritable doesn't support append. - for (BytesWritable bytes : bw){ - System.arraycopy(bytes.getBytes(), 0, out, curLen, bytes.getLength()); - curLen += bytes.getLength(); - } - return new BytesWritable(out); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLower.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLower.java deleted file mode 100755 index f79cbdf..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLower.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.io.Text; - -/** - * UDFLower. - * - */ -@Description(name = "lower,lcase", - value = "_FUNC_(str) - Returns str with all characters changed to lowercase", - extended = "Example:\n" - + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'facebook'") -public class UDFLower extends UDF { - private Text t = new Text(); - - public UDFLower() { - } - - public Text evaluate(Text s) { - if (s == null) { - return null; - } - t.set(s.toString().toLowerCase()); - return t; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUpper.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUpper.java deleted file mode 100755 index 7dc682b..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUpper.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.io.Text; - -/** - * UDFUpper. - * - */ -@Description(name = "upper,ucase", - value = "_FUNC_(str) - Returns str with all characters changed to uppercase", - extended = "Example:\n" - + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'FACEBOOK'") -public class UDFUpper extends UDF { - - Text t = new Text(); - - public UDFUpper() { - } - - public Text evaluate(Text s) { - if (s == null) { - return null; - } - t.set(s.toString().toUpperCase()); - return t; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java new file mode 100644 index 0000000..0ce1825 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java @@ -0,0 +1,203 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.io.BytesWritable; + +/** + * GenericUDFConcat. + */ +@Description(name = "concat", +value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or "+ + "_FUNC_(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data " + + " bin1, bin2, ... binN", +extended = "Returns NULL if any argument is NULL.\n" ++ "Example:\n" ++ " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" ++ " 'abcdef'") +public class GenericUDFConcat extends GenericUDF { + private transient ObjectInspector[] argumentOIs; + private transient StringConverter[] stringConverters; + private transient PrimitiveCategory returnType = PrimitiveCategory.STRING; + private transient BytesWritable[] bw; + private transient GenericUDFUtils.StringHelper returnHelper; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + + // Loop through all the inputs to determine the appropriate return type/length. + // Either all arguments are binary, or all columns are non-binary. + // Return type: + // All VARCHAR inputs: return VARCHAR + // All BINARY inputs: return BINARY + // Otherwise return STRING + argumentOIs = arguments; + + PrimitiveCategory currentCategory; + PrimitiveObjectInspector poi; + boolean fixedLengthReturnValue = true; + int returnLength = 0; // Only for char/varchar return types + for (int idx = 0; idx < arguments.length; ++idx) { + if (arguments[idx].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException("CONCAT only takes primitive arguments"); + } + poi = (PrimitiveObjectInspector)arguments[idx]; + currentCategory = poi.getPrimitiveCategory(); + if (idx == 0) { + returnType = currentCategory; + } + switch (currentCategory) { + case BINARY: + fixedLengthReturnValue = false; + if (returnType != currentCategory) { + throw new UDFArgumentException( + "CONCAT cannot take a mix of binary and non-binary arguments"); + } + break; + case VARCHAR: + if (returnType == PrimitiveCategory.BINARY) { + throw new UDFArgumentException( + "CONCAT cannot take a mix of binary and non-binary arguments"); + } + break; + default: + if (returnType == PrimitiveCategory.BINARY) { + throw new UDFArgumentException( + "CONCAT cannot take a mix of binary and non-binary arguments"); + } + returnType = PrimitiveCategory.STRING; + fixedLengthReturnValue = false; + break; + } + + // If all arguments are of known length then we can keep track of the max + // length of the return type. However if the return length exceeds the + // max length for the char/varchar, then the return type reverts to string. + if (fixedLengthReturnValue) { + returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi); + if (returnType == PrimitiveCategory.VARCHAR + && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) { + returnType = PrimitiveCategory.STRING; + fixedLengthReturnValue = false; + } + } + } + + if (returnType == PrimitiveCategory.BINARY) { + bw = new BytesWritable[arguments.length]; + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + // treat all inputs as string, the return value will be converted to the appropriate type. + createStringConverters(); + returnHelper = new GenericUDFUtils.StringHelper(returnType); + switch (returnType) { + case STRING: + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + case VARCHAR: + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(returnLength); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs(returnType, varcharParams)); + default: + throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType); + } + } + } + + private void createStringConverters() { + stringConverters = new StringConverter[argumentOIs.length]; + for (int idx = 0; idx < argumentOIs.length; ++idx) { + stringConverters[idx] = new StringConverter((PrimitiveObjectInspector) argumentOIs[idx]); + } + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (returnType == PrimitiveCategory.BINARY) { + return binaryEvaluate(arguments); + } else { + return returnHelper.setReturnValue(stringEvaluate(arguments)); + } + } + + public Object binaryEvaluate(DeferredObject[] arguments) throws HiveException { + int len = 0; + for (int idx = 0; idx < arguments.length; ++idx) { + bw[idx] = ((BinaryObjectInspector)argumentOIs[idx]) + .getPrimitiveWritableObject(arguments[idx].get()); + if (bw[idx] == null){ + return null; + } + len += bw[idx].getLength(); + } + + byte[] out = new byte[len]; + int curLen = 0; + // Need to iterate twice since BytesWritable doesn't support append. + for (BytesWritable bytes : bw){ + System.arraycopy(bytes.getBytes(), 0, out, curLen, bytes.getLength()); + curLen += bytes.getLength(); + } + return new BytesWritable(out); + } + + public String stringEvaluate(DeferredObject[] arguments) throws HiveException { + StringBuilder sb = new StringBuilder(); + for (int idx = 0; idx < arguments.length; ++idx) { + String val = null; + if (arguments[idx] != null) { + val = (String) stringConverters[idx].convert(arguments[idx].get()); + } + if (val == null) { + return null; + } + sb.append(val); + } + return sb.toString(); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("concat("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(", "); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java new file mode 100644 index 0000000..366d9e6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +/** + * UDFLower. + * + */ +@Description(name = "lower,lcase", +value = "_FUNC_(str) - Returns str with all characters changed to lowercase", +extended = "Example:\n" ++ " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'facebook'") +public class GenericUDFLower extends GenericUDF { + private transient PrimitiveObjectInspector argumentOI; + private transient StringConverter stringConverter; + private transient PrimitiveCategory returnType = PrimitiveCategory.STRING; + private transient GenericUDFUtils.StringHelper returnHelper; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 0) { + throw new UDFArgumentLengthException( + "LOWER requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException( + "LOWER only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case VARCHAR: + // return type should have same length as the input. + returnType = inputType; + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength( + GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); + outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + argumentOI); + break; + default: + returnType = PrimitiveCategory.STRING; + outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + break; + } + returnHelper = new GenericUDFUtils.StringHelper(returnType); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + val = val.toLowerCase(); + return returnHelper.setReturnValue(val); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("lower("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(","); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java new file mode 100644 index 0000000..1bb164a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +/** + * UDFUpper. + * + */ +@Description(name = "upper,ucase", + value = "_FUNC_(str) - Returns str with all characters changed to uppercase", + extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'FACEBOOK'") +public class GenericUDFUpper extends GenericUDF { + private transient PrimitiveObjectInspector argumentOI; + private transient StringConverter stringConverter; + private transient PrimitiveCategory returnType = PrimitiveCategory.STRING; + private transient GenericUDFUtils.StringHelper returnHelper; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 0) { + throw new UDFArgumentLengthException( + "UPPER requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException( + "UPPER only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case VARCHAR: + // return type should have same length as the input. + returnType = inputType; + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength( + GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); + outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + argumentOI); + break; + default: + returnType = PrimitiveCategory.STRING; + outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + break; + } + returnHelper = new GenericUDFUtils.StringHelper(returnType); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + val = val.toUpperCase(); + return returnHelper.setReturnValue(val); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("upper("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(","); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + +} diff --git ql/src/test/results/compiler/plan/groupby2.q.xml ql/src/test/results/compiler/plan/groupby2.q.xml index c5492af..fc3a37f 100755 --- ql/src/test/results/compiler/plan/groupby2.q.xml +++ ql/src/test/results/compiler/plan/groupby2.q.xml @@ -1532,14 +1532,7 @@ - - - org.apache.hadoop.hive.ql.udf.UDFConcat - - - concat - - + diff --git ql/src/test/results/compiler/plan/udf6.q.xml ql/src/test/results/compiler/plan/udf6.q.xml index ba4cf31..4b97cd6 100644 --- ql/src/test/results/compiler/plan/udf6.q.xml +++ ql/src/test/results/compiler/plan/udf6.q.xml @@ -385,14 +385,7 @@ - - - org.apache.hadoop.hive.ql.udf.UDFConcat - - - concat - - +