Index: src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java (revision 1024303) +++ src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java (working copy) @@ -70,6 +70,31 @@ Class outputKeyClass, Class outputValueClass, Job job) throws IOException { + initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, + job, true); + } + + /** + * Use this before submitting a TableMap job. It will appropriately set up + * the job. + * + * @param table The table name to read from. + * @param scan The scan instance with the columns, time range etc. + * @param mapper The mapper class to use. + * @param outputKeyClass The class of the output key. + * @param outputValueClass The class of the output value. + * @param job The current job to adjust. Make sure the passed job is + * carrying all necessary HBase configuration. + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + * @throws IOException When setting up the details fails. + */ + public static void initTableMapperJob(String table, Scan scan, + Class mapper, + Class outputKeyClass, + Class outputValueClass, Job job, + boolean addDependencyJars) + throws IOException { job.setInputFormatClass(TableInputFormat.class); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); @@ -77,7 +102,9 @@ job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); - addDependencyJars(job); + if (addDependencyJars) { + addDependencyJars(job); + } } /** @@ -167,7 +194,39 @@ Class reducer, Job job, Class partitioner, String quorumAddress, String serverClass, String serverImpl) throws IOException { + initTableReducerJob(table, reducer, job, partitioner, quorumAddress, + serverClass, serverImpl, true); + } + /** + * Use this before submitting a TableReduce job. It will + * appropriately set up the JobConf. + * + * @param table The output table. + * @param reducer The reducer class to use. + * @param job The current job to adjust. Make sure the passed job is + * carrying all necessary HBase configuration. + * @param partitioner Partitioner to use. Pass null to use + * default partitioner. + * @param quorumAddress Distant cluster to write to; default is null for + * output to the cluster that is designated in hbase-site.xml. + * Set this String to the zookeeper ensemble of an alternate remote cluster + * when you would have the reduce write a cluster that is other than the + * default; e.g. copying tables between clusters, the source would be + * designated by hbase-site.xml and this param would have the + * ensemble address of the remote cluster. The format to pass is particular. + * Pass <hbase.zookeeper.quorum> ':' <ZOOKEEPER_ZNODE_PARENT>. + * @param serverClass redefined hbase.regionserver.class + * @param serverImpl redefined hbase.regionserver.impl + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + * @throws IOException When determining the region count fails. + */ + public static void initTableReducerJob(String table, + Class reducer, Job job, + Class partitioner, String quorumAddress, String serverClass, + String serverImpl, boolean addDependencyJars) throws IOException { + Configuration conf = job.getConfiguration(); job.setOutputFormatClass(TableOutputFormat.class); if (reducer != null) job.setReducerClass(reducer); @@ -198,7 +257,10 @@ } else if (partitioner != null) { job.setPartitionerClass(partitioner); } - addDependencyJars(job); + + if (addDependencyJars) { + addDependencyJars(job); + } } /** Index: src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java (revision 1024303) +++ src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java (working copy) @@ -56,17 +56,39 @@ Class mapper, Class outputKeyClass, Class outputValueClass, JobConf job) { + initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true); + } + /** + * Use this before submitting a TableMap job. It will + * appropriately set up the JobConf. + * + * @param table The table name to read from. + * @param columns The columns to scan. + * @param mapper The mapper class to use. + * @param outputKeyClass The class of the output key. + * @param outputValueClass The class of the output value. + * @param job The current job configuration to adjust. + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + */ + public static void initTableMapJob(String table, String columns, + Class mapper, + Class outputKeyClass, + Class outputValueClass, JobConf job, boolean addDependencyJars) { + job.setInputFormat(TableInputFormat.class); job.setMapOutputValueClass(outputValueClass); job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); - try { - addDependencyJars(job); - } catch (IOException e) { - e.printStackTrace(); + if (addDependencyJars) { + try { + addDependencyJars(job); + } catch (IOException e) { + e.printStackTrace(); + } } } @@ -99,6 +121,25 @@ public static void initTableReduceJob(String table, Class reducer, JobConf job, Class partitioner) throws IOException { + initTableReduceJob(table, reducer, job, partitioner, true); + } + + /** + * Use this before submitting a TableReduce job. It will + * appropriately set up the JobConf. + * + * @param table The output table. + * @param reducer The reducer class to use. + * @param job The current job configuration to adjust. + * @param partitioner Partitioner to use. Pass null to use + * default partitioner. + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + * @throws IOException When determining the region count fails. + */ + public static void initTableReduceJob(String table, + Class reducer, JobConf job, Class partitioner, + boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); @@ -114,7 +155,9 @@ } else if (partitioner != null) { job.setPartitionerClass(partitioner); } - addDependencyJars(job); + if (addDependencyJars) { + addDependencyJars(job); + } } /**