From 2ce1b36d883d2ad9da7731acf7def5ed0b7c1b60 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Wed, 13 Nov 2013 17:21:15 -0800 Subject: [PATCH] HBASE-8438 Extend bin/hbase to print a "minimal classpath" for used by other tools $ ./bin/hbase mapredcp --help Usage: hbase mapredcp [-Dtmpjars=...] Construct a CLASSPATH containing dependency jars required to run a mapreduce job. By default, includes any jars detected by TableMapReduceUtils. Provide additional entries by specifying a comma-separated list in tmpjars. $ ./bin/hbase mapredcp 2>/dev/null | tr ':' '\n' /Users/ndimiduk/.m2/repository/org/apache/zookeeper/zookeeper/3.4.5/zookeeper-3.4.5.jar /Users/ndimiduk/repos/hbase/hbase-common/target/hbase-common-0.96.0.jar /Users/ndimiduk/.m2/repository/org/cloudera/htrace/htrace-core/2.01/htrace-core-2.01.jar /Users/ndimiduk/repos/hbase/hbase-client/target/hbase-client-0.96.0.jar /Users/ndimiduk/.m2/repository/io/netty/netty/3.6.6.Final/netty-3.6.6.Final.jar /Users/ndimiduk/repos/hbase/hbase-protocol/target/hbase-protocol-0.96.0.jar /Users/ndimiduk/repos/hbase/hbase-hadoop-compat/target/hbase-hadoop-compat-0.96.0.jar /Users/ndimiduk/repos/hbase/hbase-server/target/hbase-server-0.96.0.jar /Users/ndimiduk/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar /Users/ndimiduk/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.1.0-beta/hadoop-mapreduce-client-core-2.1.0-beta.jar /Users/ndimiduk/.m2/repository/org/apache/hadoop/hadoop-common/2.1.0-beta/hadoop-common-2.1.0-beta.jar /Users/ndimiduk/.m2/repository/com/google/guava/guava/12.0.1/guava-12.0.1.jar --- bin/hbase | 4 +- bin/hbase.cmd | 7 ++- .../hadoop/hbase/mapreduce/TableMapReduceUtil.java | 29 +++++++++- .../util/MapreduceDependencyClasspathTool.java | 66 ++++++++++++++++++++++ 4 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java diff --git a/bin/hbase b/bin/hbase index 1fcf0bc..261a685 100755 --- a/bin/hbase +++ b/bin/hbase @@ -92,6 +92,7 @@ if [ $# = 0 ]; then echo " thrift2 Run the HBase Thrift2 server" echo " clean Run the HBase clean up script" echo " classpath Dump hbase CLASSPATH" + echo " mapredcp Dump CLASSPATH entries required by mapreduce" echo " version Print the version" echo " CLASSNAME Run the class named CLASSNAME" exit 1 @@ -324,7 +325,8 @@ elif [ "$COMMAND" = "clean" ] ; then fi "$bin"/hbase-cleanup.sh --config ${HBASE_CONF_DIR} $@ exit $? - +elif [ "$COMMAND" = "mapredcp" ] ; then + CLASS='org.apache.hadoop.hbase.util.MapreduceDependencyClasspathTool' elif [ "$COMMAND" = "classpath" ] ; then echo $CLASSPATH exit 0 diff --git a/bin/hbase.cmd b/bin/hbase.cmd index 13442de..48da7ad 100644 --- a/bin/hbase.cmd +++ b/bin/hbase.cmd @@ -202,7 +202,7 @@ goto :MakeCmdArgsLoop set hbase-command-arguments=%_hbasearguments% @rem figure out which class to run -set corecommands=shell master regionserver thrift thrift2 rest avro hlog hbck hfile zookeeper zkcli upgrade +set corecommands=shell master regionserver thrift thrift2 rest avro hlog hbck hfile zookeeper zkcli upgrade mapredcp for %%i in ( %corecommands% ) do ( if "%hbase-command%"=="%%i" set corecommand=true ) @@ -380,6 +380,10 @@ goto :eof set CLASS=org.apache.hadoop.hbase.migration.UpgradeTo96 goto :eof +:mapredcp + set CLASS=org.apache.hadoop.hbase.util.MapreduceDependencyClasspathTool + goto :eof + :makeServiceXml set arguments=%* @echo ^ @@ -412,6 +416,7 @@ goto :eof echo thrift Run the HBase Thrift server echo thrift2 Run the HBase Thrift2 server echo classpath Dump hbase CLASSPATH + echo mapredcp Dump CLASSPATH entries required by mapreduce echo version Print the version echo CLASSNAME Run the class named CLASSNAME goto :eof diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java index 0d2b088..dc87071 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java @@ -18,6 +18,7 @@ */ package org.apache.hadoop.hbase.mapreduce; +import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -99,7 +100,6 @@ public class TableMapReduceUtil { job, true); } - /** * Use this before submitting a TableMap job. It will appropriately set up * the job. @@ -558,6 +558,29 @@ public class TableMapReduceUtil { } /** + * Returns a classpath string built from the content of the "tmpjars" value in {@code conf}. + * Also exposed to shell scripts via `bin/hbase mapredcp`. + */ + public static String buildDependencyClasspath(Configuration conf) { + if (conf == null) { + throw new IllegalArgumentException("Must provide a configuration object."); + } + Set paths = new HashSet(conf.getStringCollection("tmpjars")); + if (paths.size() == 0) { + throw new IllegalArgumentException("Configuration contains no tmpjars."); + } + StringBuilder sb = new StringBuilder(); + for (String s : paths) { + // entries can take the form 'file:/path/to/file.jar'. + int idx = s.indexOf(":"); + if (idx != -1) s = s.substring(idx + 1); + if (sb.length() > 0) sb.append(File.pathSeparator); + sb.append(s); + } + return sb.toString(); + } + + /** * Add the HBase dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. @@ -570,6 +593,7 @@ public class TableMapReduceUtil { org.apache.hadoop.hbase.protobuf.generated.ClientProtos.class, // hbase-protocol org.apache.hadoop.hbase.client.Put.class, // hbase-client org.apache.hadoop.hbase.CompatibilityFactory.class, // hbase-hadoop-compat + org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.class, // hbase-server // pull necessary dependencies org.apache.zookeeper.ZooKeeper.class, org.jboss.netty.channel.ChannelFactory.class, @@ -626,8 +650,7 @@ public class TableMapReduceUtil { } if (jars.isEmpty()) return; - conf.set("tmpjars", - StringUtils.arrayToString(jars.toArray(new String[0]))); + conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()]))); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java new file mode 100644 index 0000000..79cd7f3 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/MapreduceDependencyClasspathTool.java @@ -0,0 +1,66 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * Generate a classpath string containing any jars required by mapreduce jobs. Specify + * additional values by providing a comma-separated list of paths via -Dtmpjars. + */ +public class MapreduceDependencyClasspathTool implements Tool { + + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public int run(String[] args) throws Exception { + if (args.length > 0) { + System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]"); + System.err.println(" Construct a CLASSPATH containing dependency jars required to run a mapreduce"); + System.err.println(" job. By default, includes any jars detected by TableMapReduceUtils. Provide"); + System.err.println(" additional entries by specifying a comma-separated list in tmpjars."); + return 0; + } + + Job job = new Job(getConf()); + TableMapReduceUtil.addDependencyJars(job); + System.out.println(TableMapReduceUtil.buildDependencyClasspath(job.getConfiguration())); + return 0; + } + + public static void main(String[] argv) throws Exception { + System.exit(ToolRunner.run( + HBaseConfiguration.create(), new MapreduceDependencyClasspathTool(), argv)); + } +} -- 1.8.4.2