diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a8bdefdad6..da3c2f9f0a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3468,7 +3468,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "This parameter enables a number of optimizations when running on blobstores:\n" + "(1) If hive.blobstore.use.blobstore.as.scratchdir is false, force the last Hive job to write to the blobstore.\n" + "This is a performance optimization that forces the final FileSinkOperator to write to the blobstore.\n" + - "See HIVE-15121 for details."); + "See HIVE-15121 for details."), + + /*aux UDF section */ + + HIVE_AUX_UDF_PACKAGE_NAME_LIST("hive.aux.udf.package.name.list","","Comma separated list of hive user aux udf function packagenames"); public final String varname; public final String altName; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 9795f3ef98..f31daaf3d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Set; import java.util.TreeSet; +import java.util.Map; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -132,6 +133,7 @@ import org.apache.hadoop.hive.ql.udf.xml.UDFXPathLong; import org.apache.hadoop.hive.ql.udf.xml.UDFXPathShort; import org.apache.hadoop.hive.ql.udf.xml.UDFXPathString; +import org.apache.hadoop.hive.ql.util.UDFRegister; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -190,6 +192,14 @@ private static final Registry system = new Registry(true); static { + for (Map.Entry entry : UDFRegister.getUDFs().entrySet()) { + try { + Class clazz = Class.forName(entry.getValue()); + system.registerFunction(entry.getKey(), clazz); + } catch (Exception e) { + LOG.error("Failed to register the " + entry.getKey() + "function with the class " + entry.getValue()); + } + } system.registerGenericUDF("concat", GenericUDFConcat.class); system.registerUDF("substr", UDFSubstr.class, false); system.registerUDF("substring", UDFSubstr.class, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/ClassUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/util/ClassUtil.java new file mode 100644 index 0000000000..4fd3e2336b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/ClassUtil.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.net.JarURLConnection; +import java.net.URL; +import java.net.URLDecoder; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +/** + * the class helper util + */ +public class ClassUtil { + private static final Logger LOG = LoggerFactory.getLogger(ClassUtil.class); + + public static List> getClasses(String packageName) { + + List> classes = new ArrayList>(); + boolean recursive = true; + String packageDirName = packageName.replace('.', '/'); + Enumeration dirs; + try { + dirs = Thread.currentThread().getContextClassLoader().getResources(packageDirName); + while (dirs.hasMoreElements()) { + URL url = dirs.nextElement(); + String protocol = url.getProtocol(); + if ("file".equals(protocol)) { + String filePath = URLDecoder.decode(url.getFile(), "UTF-8"); + findAndAddClassesInPackageByFile(packageName, filePath, recursive, classes); + } else if ("jar".equals(protocol)) { + JarFile jar; + try { + jar = ((JarURLConnection) url.openConnection()).getJarFile(); + Enumeration entries = jar.entries(); + while (entries.hasMoreElements()) { + JarEntry entry = entries.nextElement(); + String name = entry.getName(); + if (name.charAt(0) == '/') { + name = name.substring(1); + } + if (name.startsWith(packageDirName)) { + int idx = name.lastIndexOf('/'); + if (idx != -1) { + packageName = name.substring(0, idx).replace('/', '.'); + } + if ((idx != -1) || recursive) { + if (name.endsWith(".class") && !entry.isDirectory()) { + String className = name.substring(packageName.length() + 1, name.length() - 6); + try { + classes.add(Class.forName(packageName + '.' + className)); + } catch (ClassNotFoundException e) { + LOG.error("Failed to add class with the class "+className+" in package "+packageName,e); + } + } + } + } + } + } catch (IOException e) { + LOG.error("getJarFile occur an exception",e); + } + } + } + } catch (IOException e) { + LOG.error("getResources occur an exception",e); + } + + return classes; + } + + public static void findAndAddClassesInPackageByFile(String packageName, String packagePath, final boolean recursive, List> classes) { + File dir = new File(packagePath); + if (!dir.exists() || !dir.isDirectory()) { + return; + } + File[] dirfiles = dir.listFiles(new FileFilter() { + public boolean accept(File file) { + return (recursive && file.isDirectory()) || (file.getName().endsWith(".class")); + } + }); + for (File file : dirfiles) { + if (file.isDirectory()) { + findAndAddClassesInPackageByFile(packageName + "." + file.getName(), + file.getAbsolutePath(), + recursive, + classes); + } else { + String className = file.getName().substring(0, file.getName().length() - 6); + try { + classes.add(Class.forName(packageName + '.' + className)); + } catch (ClassNotFoundException e) { + LOG.error("Failed to add class with the class "+className+" in package "+packageName,e); + } + } + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/UDFRegister.java b/ql/src/java/org/apache/hadoop/hive/ql/util/UDFRegister.java new file mode 100644 index 0000000000..424846fdf9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/UDFRegister.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.util; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Description; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class UDFRegister { + private static final String PACKAGE_NAME_REGEX = "^[a-z]+[.]([a-z]+[.]){0,}[a-z]+$|^[a-z]+$"; + private static final Logger LOG = LoggerFactory.getLogger(UDFRegister.class); + + public static Map getUDFs() { + Map udfs = new HashMap(); + String packageNameStrs = new HiveConf().getVar(HiveConf.ConfVars.HIVE_AUX_UDF_PACKAGE_NAME_LIST); + if (!checkPackageNameStrs(packageNameStrs)) { + LOG.warn("User udf package name does not meet the package naming rules"); + } + for (String packageName : packageNameStrs.split(",")) { + List> classes = ClassUtil.getClasses(packageName.trim()); + if (classes != null && classes.size() > 0) { + for (Class clas : classes) { + Description a = (Description) clas.getAnnotation(Description.class); + if (a != null) { + udfs.put(a.name(), clas.getCanonicalName()); + } + } + } + } + return udfs; + } + + private static boolean checkPackageNameStrs(String packageNameStrs) { + if (packageNameStrs == null || packageNameStrs.trim().isEmpty()) { + return false; + } + for (String packageName : packageNameStrs.split(",")) { + if (!verifyPackageName(packageName.trim())) { + return false; + } + } + return true; + } + + private static boolean verifyPackageName(String packageName) { + Pattern pattern = Pattern.compile(PACKAGE_NAME_REGEX); + Matcher matcher = pattern.matcher(packageName); + return matcher.find(); + } +}