From 27006c9ba93fba01da2acd9873b706734c2b9f4d Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Thu, 25 Apr 2013 18:28:52 -0700 Subject: [PATCH] HBASE-8438 Extend bin/hbase to print a "minimal classpath" For tools like pig and hive, blindly appending the full output of `bin/hbase classpath` to their own CLASSPATH is excessive. They already build CLASSPATH entries for hadoop. All they need from us is the delta entries, the dependencies we require w/o hadoop and all of it's transitive deps. This is also a kindness for Windows, where there's a shorter limit on the length of commandline arguments. See also HIVE-2055 for additional discussion. Example usage: $ ./bin/hbase classpath-min \ -n -e ".*thrift.*" "$(./bin/hbase classpath)" 2>/dev/null | tr : '\n' /Users/ndimiduk/.m2/repository/org/jamon/jamon-runtime/2.3.1/jamon-runtime-2.3.1.jar /Users/ndimiduk/repos/hbase/conf /Users/ndimiduk/.m2/repository/javax/activation/activation/1.1/activation-1.1.jar /Users/ndimiduk/.m2/repository/com/google/protobuf/protobuf-java/2.4.1/protobuf-java-2.4.1.jar /Library/Java/JavaVirtualMachines/1.6.0_37-b06-434.jdk/Contents/Home/lib/tools.jar /Users/ndimiduk/.m2/repository/com/github/stephenc/findbugs/findbugs-annotations/1.3.9-1/findbugs-annotations-1.3.9-1.jar /Users/ndimiduk/.m2/repository/org/apache/zookeeper/zookeeper/3.4.5/zookeeper-3.4.5.jar /Users/ndimiduk/.m2/repository/stax/stax-api/1.0.1/stax-api-1.0.1.jar /Users/ndimiduk/.m2/repository/org/codehaus/jettison/jettison/1.3.1/jettison-1.3.1.jar /Users/ndimiduk/.m2/repository/com/google/guava/guava/12.0.1/guava-12.0.1.jar /Users/ndimiduk/repos/hbase/hbase-client/target/classes /Users/ndimiduk/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.8.8/jackson-jaxrs-1.8.8.jar /Users/ndimiduk/.m2/repository/org/apache/httpcomponents/httpclient/4.1.3/httpclient-4.1.3.jar /Users/ndimiduk/.m2/repository/com/github/stephenc/high-scale-lib/high-scale-lib/1.1.1/high-scale-lib-1.1.1.jar /Users/ndimiduk/.m2/repository/org/apache/ftpserver/ftpserver-deprecated/1.0.0-M2/ftpserver-deprecated-1.0.0-M2.jar /Users/ndimiduk/.m2/repository/javax/xml/bind/jaxb-api/2.1/jaxb-api-2.1.jar /Users/ndimiduk/.m2/repository/com/sun/xml/bind/jaxb-impl/2.2.3-1/jaxb-impl-2.2.3-1.jar /Users/ndimiduk/.m2/repository/org/cloudera/htrace/htrace/1.50/htrace-1.50.jar /Users/ndimiduk/.m2/repository/org/apache/ftpserver/ftplet-api/1.0.0/ftplet-api-1.0.0.jar /Users/ndimiduk/.m2/repository/org/codehaus/jackson/jackson-xc/1.8.8/jackson-xc-1.8.8.jar /Users/ndimiduk/repos/hbase/hbase-common/target/test-classes /Users/ndimiduk/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar /Users/ndimiduk/.m2/repository/io/netty/netty/3.5.9.Final/netty-3.5.9.Final.jar /Users/ndimiduk/repos/hbase/hbase-server/target /Users/ndimiduk/.m2/repository/org/apache/hadoop/hadoop-test/1.1.2/hadoop-test-1.1.2.jar /Users/ndimiduk/.m2/repository/org/jruby/jruby-complete/1.6.8/jruby-complete-1.6.8.jar --- bin/hbase | 6 + .../hadoop/hbase/util/MinimalClasspathTool.java | 169 +++++++++++++++++++++ .../hbase/util/TestMinimalClasspathTool.java | 48 ++++++ 3 files changed, 223 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/util/MinimalClasspathTool.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMinimalClasspathTool.java diff --git a/bin/hbase b/bin/hbase index 7c1bfb4..43d197d 100755 --- a/bin/hbase +++ b/bin/hbase @@ -88,6 +88,7 @@ if [ $# = 0 ]; then echo "" echo "PACKAGE MANAGEMENT" echo " classpath dump hbase CLASSPATH" + echo " classpath-min dump a minified CLASSPATH" echo " version print the version" echo "" echo " or" @@ -310,6 +311,11 @@ elif [ "$COMMAND" = "zookeeper" ] ; then elif [ "$COMMAND" = "classpath" ] ; then echo $CLASSPATH exit 0 +elif [ "$COMMAND" = "classpath-min" ] ; then + CLASS='org.apache.hadoop.hbase.util.MinimalClasspathTool' + if [ -f ${HADOOP_IN_PATH} ] ; then + HBASE_OPTS="$HBASE_OPTS -Dhadoop.classpath=$($HADOOP_IN_PATH classpath 2>/dev/null)" + fi elif [ "$COMMAND" = "version" ] ; then CLASS='org.apache.hadoop.hbase.util.VersionInfo' else diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MinimalClasspathTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MinimalClasspathTool.java new file mode 100644 index 0000000..8d790a1 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MinimalClasspathTool.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import org.apache.commons.cli.BasicParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; + +@InterfaceAudience.Private +public class MinimalClasspathTool { + + protected static final Log LOG = LogFactory.getLog(MinimalClasspathTool.class); + private static final String CMD_SYNTAX = "MinimalClasspathTool [--no-hadoop] [-e]* CLASSPATH"; + private static final String HELP = + "Minimal Classpath Tool will parse a PATH-like structure and remove" + + " duplicate entries. Two entries are considered duplicates when their" + + " basenames are identical. Optionally, specify a set of regular expression" + + " exclusions to apply to each path entry. Any entries which match any of" + + " the patterns will be omitted from the final PATH. Specifying --no-hadoop" + + " will also omit entries found in hadoop.classpath system property. Upon" + + " success, the final path is printed to stdout."; + + /** + * Normalize the Path entries in argv. + * @param argv path entries. + * @return a new String[] containing the normalized results. + */ + private static String[] normalize(String[] argv) throws IOException { + String[] ret = new String[argv.length]; + for (int i = 0; i < argv.length; i++) { + ret[i] = new File(argv[i]).getCanonicalPath(); + } + return ret; + } + + /** + * Create exact match expressions jars matching entries in hadoop.classpath. + */ + private static String[] hadoopExcludes() throws IOException { + String hcp = System.getProperty("hadoop.classpath", ""); + if (hcp.isEmpty()) { + LOG.error("System property hadoop.classpath is not set."); + System.exit(1); + } + String[] ret = normalize(hcp.split(File.pathSeparator)); + for (int i = 0; i < ret.length; i++) { + File f = new File(ret[i]); + // be careful about including a directory here as it will exclude much + // more than intended. + if (f.isDirectory()) continue; + String basename = f.getName(); + // trim off the version number component + basename = basename.replaceAll("-\\d.*", ""); + ret[i] = ".*" + basename + ".*\\.jar"; + LOG.debug("adding exclusion: '" + ret[i] + "'"); + } + LOG.debug("Added " + ret.length + " exclusions from hadoop.classpath"); + return ret; + } + + /** + * Remove duplicate entries from a PATH-like string. + * @see #HELP + */ + public static String minimize(String cpLike, String[] excludes) throws IOException { + // watch for null off the parser + if (null == excludes) excludes = new String[0]; + List patterns = new ArrayList(excludes.length); + for (String exclude : excludes) + patterns.add(Pattern.compile(exclude)); + + // build a set of unique entries + String[] entries = normalize(cpLike.split(File.pathSeparator)); + Map deduped = new HashMap(); + ENTRIES: for (String entry : entries) { + for (Pattern p : patterns) { + // drop anything that matches an exclude pattern + if (p.matcher(entry).matches()) { + LOG.debug("Entry '" + entry + "' matches exclude pattern '" + p + "'. Skipping."); + continue ENTRIES; + } + // drop wild-cards on the floor for now. + if (entry.contains("*")) { + LOG.debug("Entry '" + entry + "' is a wildcard. Skipping."); + continue ENTRIES; + } + } + String basename = new File(entry).getName(); + if (!deduped.containsKey(basename)) deduped.put(basename, entry); + } + + StringBuilder sb = new StringBuilder(); + for (Map.Entry e : deduped.entrySet()) { + sb.append(e.getValue()).append(File.pathSeparator); + } + return sb.toString(); + } + + /** + * Produces specialized classpath constructions. + * @see #HELP + */ + public static void main(String[] args) { + Options opts = new Options(); + opts.addOption("h", "help", false, "Print help and exit."); + opts.addOption("n", "no-hadoop", false, + " When --no-hadoop is specified, any entries found in the system property " + + "$hadoop.classpath are also excluded."); + opts.addOption("e", "exclude", true, + "entries to exclude, matched against the full path. Accepts Java regex patterns."); + CommandLineParser parser = new BasicParser(); + CommandLine cli; + HelpFormatter help = new HelpFormatter(); + try { + cli = parser.parse(opts, args, true); + for (Option o : cli.getOptions()) { + LOG.debug(o.getOpt() + "='" + cli.hasOption(o.getOpt()) + "'"); + } + if (cli.hasOption("h")) { + help.printHelp(CMD_SYNTAX, HELP, opts, ""); + System.exit(0); + } + if (1 != cli.getArgs().length) { + help.printHelp(CMD_SYNTAX, HELP, opts, ""); + System.exit(1); + } + String[] excludes = cli.getOptionValues("exclude"); + excludes = excludes == null ? new String[0] : excludes; + if (cli.hasOption("n")) + excludes = (String[]) ArrayUtils.addAll(hadoopExcludes(), excludes); + System.out.println(minimize(cli.getArgs()[0], excludes)); + } catch (Exception e) { + LOG.error(e.getMessage(), e); + help.printHelp(CMD_SYNTAX, opts); + System.exit(1); + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMinimalClasspathTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMinimalClasspathTool.java new file mode 100644 index 0000000..279735c --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMinimalClasspathTool.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import static org.apache.hadoop.hbase.util.MinimalClasspathTool.minimize; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestMinimalClasspathTool { + + private static final String SEP = File.pathSeparator; + + @Test + public void testDupes() throws IOException { + assertEquals( + "/foo/bar" + SEP, + minimize("/foo/bar" + SEP + "/foo/bar", null)); + assertEquals( + "/foo/bar" + SEP, + minimize("/foo/bar" + SEP + "/bub/bar", null)); + assertEquals( + "", + minimize("/foo/bar" + SEP + "/bub/bar", new String[] { ".*bar.*" })); + } +} -- 1.8.1