From 2e8c7fcf99d70dfbebbb5e25e03726a04e90cac6 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Thu, 14 Nov 2013 14:38:41 -0800 Subject: [PATCH] HBASE-9165 [mapreduce] Modularize building dependency jars Separate adding HBase and dependencies from adding other job dependencies, and expose it as a separate method that other projects can use (for PIG-3285, HIVE-2055). --- .../hadoop/hbase/mapred/TableMapReduceUtil.java | 7 ++-- .../hadoop/hbase/mapreduce/TableMapReduceUtil.java | 45 +++++++++++++++------- .../hadoop/hbase/mapreduce/TestTableMapReduce.java | 10 ++--- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java index 0c7d47d..13a3043 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java @@ -294,14 +294,13 @@ public class TableMapReduceUtil { } /** - * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(Job) + * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job) */ public static void addDependencyJars(JobConf job) throws IOException { + org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars( job, - org.apache.zookeeper.ZooKeeper.class, - com.google.common.base.Function.class, - com.google.protobuf.Message.class, + // when making changes here, consider also mapreduce.TableMapReduceUtil job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getOutputKeyClass(), diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java index 954d0a1..4246e3d 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java @@ -552,25 +552,43 @@ public static void initCredentials(Job job) throws IOException { } /** + * Add HBase and its dependencies (only) to the job configuration. + *

+ * This is intended as a low-level API, facilitating code reuse between this + * class and its mapred counterpart. It also of use to extenral tools that + * need to build a MapReduce job that interacts with HBase but want + * fine-grained control over the jars shipped to the cluster. + *

+ * @param conf The Configuration object to extend with dependencies. + * @see org.apache.hadoop.hbase.mapred.TableMapReduceUtil + * @see PIG-3285 + */ + public static void addHBaseDependencyJars(Configuration conf) throws IOException { + addDependencyJars(conf, + org.apache.zookeeper.ZooKeeper.class, + com.google.protobuf.Message.class, + com.google.common.collect.ImmutableSet.class, + org.apache.hadoop.hbase.util.Bytes.class); //one class from hbase.jar + } + + /** * Add the HBase dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. */ public static void addDependencyJars(Job job) throws IOException { + addHBaseDependencyJars(job.getConfiguration()); try { addDependencyJars(job.getConfiguration(), - org.apache.zookeeper.ZooKeeper.class, - com.google.protobuf.Message.class, - com.google.common.collect.ImmutableSet.class, - org.apache.hadoop.hbase.util.Bytes.class, //one class from hbase.jar - job.getMapOutputKeyClass(), - job.getMapOutputValueClass(), - job.getInputFormatClass(), - job.getOutputKeyClass(), - job.getOutputValueClass(), - job.getOutputFormatClass(), - job.getPartitionerClass(), - job.getCombinerClass()); + // when making changes here, consider also mapred.TableMapReduceUtil + job.getMapOutputKeyClass(), + job.getMapOutputValueClass(), + job.getInputFormatClass(), + job.getOutputKeyClass(), + job.getOutputValueClass(), + job.getOutputFormatClass(), + job.getPartitionerClass(), + job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } @@ -612,8 +630,7 @@ public static void initCredentials(Job job) throws IOException { } if (jars.isEmpty()) return; - conf.set("tmpjars", - StringUtils.arrayToString(jars.toArray(new String[0]))); + conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); } /** diff --git a/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java b/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java index b351444..96ca6cf 100644 --- a/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java +++ b/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java @@ -263,19 +263,15 @@ public class TestTableMapReduce { /** * Test that we add tmpjars correctly including the ZK jar. */ + @Test public void testAddDependencyJars() throws Exception { Job job = new Job(); TableMapReduceUtil.addDependencyJars(job); String tmpjars = job.getConfiguration().get("tmpjars"); - System.err.println("tmpjars: " + tmpjars); + assertTrue(tmpjars.contains("hbase")); assertTrue(tmpjars.contains("zookeeper")); - assertFalse(tmpjars.contains("guava")); - - System.err.println("appending guava jar"); - TableMapReduceUtil.addDependencyJars(job.getConfiguration(), - com.google.common.base.Function.class); - tmpjars = job.getConfiguration().get("tmpjars"); + assertTrue(tmpjars.contains("protobuf")); assertTrue(tmpjars.contains("guava")); } -- 1.8.4.2