diff --git a/bin/ext/UDFLint.sh b/bin/ext/UDFLint.sh new file mode 100644 index 0000000..206e6f5 --- /dev/null +++ b/bin/ext/UDFLint.sh @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +THISSERVICE=UDFLint +export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " + +UDFLint () { + CLASS=org.apache.hadoop.hive.ql.tools.UDFLint + execHiveCmd $CLASS "$@" +} + +UDFLint_help () { + CLASS=org.apache.hadoop.hive.ql.tools.UDFLint + execHiveCmd $CLASS "--help" +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/tools/UDFLint.java b/ql/src/java/org/apache/hadoop/hive/ql/tools/UDFLint.java new file mode 100644 index 0000000..6cb5ee5 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/tools/UDFLint.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.tools; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.List; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; +import java.util.jar.JarInputStream; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; + +public class UDFLint { + + static final Log LOG = LogFactory.getLog(UDFLint.class); + + static final String[] blacklist = { "org/apache/hive/", "org/apache/hadoop/" }; + public static int numberOfObject = 0; + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + Options opts = createOptions(); + CommandLine cli = new GnuParser().parse(opts, args); + boolean runVerb = cli.hasOption('v'); + int numberOfFiles = 0; + int numberOfError = 0; + + if (cli.hasOption('h') || !cli.hasOption("jar")) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("udflint", opts); + return; + } + if (cli.hasOption("jar")) { + String[] files = cli.getOptionValue("jar").split(","); + StringBuilder builder = new StringBuilder(); + + for (String sf : files) { + File f = new File(sf); + if (!f.exists()) { + LOG.error("UDFLint could not find " + f); + return; + } else { + numberOfFiles++; + if (runVerb) { + LOG.info("UDFLint is validating " + f); + numberOfError = process(conf, f, runVerb); + LOG.info(String.format( + "Processed %s Jar file of the following jars %s with $s of objects", numberOfFiles, + builder.append(f))); + + } else { + numberOfError = process(conf, f, runVerb); + } + } + } + LOG.info("The number of errors in the UDFs is " + numberOfError); + LOG.info("The number of objects validated in the UDFs is " + numberOfObject); + } + } + + @SuppressWarnings("unused") + private static int process(Configuration conf, File file, boolean verbose) throws IOException { + try (JarFile jar = new JarFile(file)) { + int i = 0; + int errorCount = 0; + for (Enumeration entries = jar.entries(); entries.hasMoreElements(); i++) { + numberOfObject++; + final JarEntry entry = entries.nextElement(); + if (verbose) { + LOG.info("Processing " + entry.toString()); + } + String name = entry.getName(); + for (String bad : blacklist) { + if (name.startsWith(bad)) { + LOG.warn("Found class from blacklisted package " + name); + errorCount++; + break; + } + } + // is a class and not an inner class + if (name.endsWith(".class") && name.contains("$") == false) { + String klass = name.replace('/', '.'); + Class localKlass = conf.getClassByNameOrNull(klass); + if (localKlass != null) { + LOG.info("Duplicate class found in classpath for " + klass); + JarInputStream jis = new JarInputStream(jar.getInputStream(entry)); + InputStream lis = localKlass.getClassLoader().getResourceAsStream(name); + String md51 = DigestUtils.md5Hex(jis); + String md52 = DigestUtils.md5Hex(lis); + + if (!md51.equals(md52)) { + LOG.warn(String.format( + "Checksums for %s do not match between local (%s) and UDF jar (%s)", klass, md51, + md52)); + errorCount++; + } else { + if (verbose) { + LOG.info(String.format( + "Checksums for %s do match between local (%s) and UDF jar (%s)", klass, md51, + md52)); + } + } + } + } + } + return errorCount; + } + } + + static Options createOptions() { + Options result = new Options(); + + result.addOption(OptionBuilder.withLongOpt("help").withDescription("print help message") + .create('h')); + + result.addOption(OptionBuilder.withLongOpt("jar") + .withDescription("Comma separated list of jars to validate").hasArg().create()); + + result.addOption(OptionBuilder.withValueSeparator().hasArgs(2).withArgName("property=value") + .withLongOpt("hiveconf").withDescription("Use value for given property").create()); + + result.addOption(OptionBuilder.withLongOpt("verbose") + .withDescription("Verbose mode (Run the tool in debug mode)").create('v')); + + return result; + } +}