diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java index df063a4..c5d858f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java @@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; @@ -88,6 +89,12 @@ public class HFileOutputFormat extends FileOutputFormat splitKeys) throws IOException { + HFileOutputFormat2.configureIncrementalLoad(job, tableDesc, splitKeys, + HFileOutputFormat.class); + } + /** * Runs inside the task to deserialize column family to compression algorithm * map from the configuration. @@ -160,8 +167,8 @@ public class HFileOutputFormat extends FileOutputFormat splitKeys) + throws IOException { + configureIncrementalLoad(job, tableDesc, splitKeys, HFileOutputFormat2.class); } static void configureIncrementalLoad(Job job, HTable table, Class> cls) throws IOException { + // Use table's region boundaries for TOP split points. + configureIncrementalLoad(job, table.getTableDescriptor(), + getRegionStartKeys(table), cls); + } + + static void configureIncrementalLoad(Job job, HTableDescriptor tableDesc, + List splitKeys, Class> cls) + throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); @@ -381,23 +397,29 @@ public class HFileOutputFormat2 MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); - // Use table's region boundaries for TOP split points. - LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); - List startKeys = getRegionStartKeys(table); + List startKeys; + if (splitKeys == null) { + startKeys = new ArrayList(); + startKeys.add(new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY)); + } + else { + startKeys = splitKeys; + } + LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families - configureCompression(table, conf); - configureBloomType(table, conf); - configureBlockSize(table, conf); - configureDataBlockEncoding(table, conf); + configureCompression(tableDesc, conf); + configureBloomType(tableDesc, conf); + configureBlockSize(tableDesc, conf); + configureDataBlockEncoding(tableDesc, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); - LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + LOG.info("Incremental table " + tableDesc.getNameAsString() + " output configured."); } @@ -535,7 +557,7 @@ public class HFileOutputFormat2 * Serialize column family to compression algorithm map to configuration. * Invoked while configuring the MR job for incremental load. * - * @param table to read the properties from + * @param tableDesc to read the properties from * @param conf to persist serialized values into * @throws IOException * on failure to read column family descriptors @@ -544,9 +566,9 @@ public class HFileOutputFormat2 value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE") @VisibleForTesting static void configureCompression( - HTable table, Configuration conf) throws IOException { + HTableDescriptor tableDesc, Configuration conf) throws IOException { StringBuilder compressionConfigValue = new StringBuilder(); - HTableDescriptor tableDescriptor = table.getTableDescriptor(); + HTableDescriptor tableDescriptor = tableDesc; if(tableDescriptor == null){ // could happen with mock table instance return; @@ -571,16 +593,16 @@ public class HFileOutputFormat2 * Serialize column family to block size map to configuration. * Invoked while configuring the MR job for incremental load. * - * @param table to read the properties from + * @param tableDesc to read the properties from * @param conf to persist serialized values into * @throws IOException * on failure to read column family descriptors */ @VisibleForTesting static void configureBlockSize( - HTable table, Configuration conf) throws IOException { + HTableDescriptor tableDesc, Configuration conf) throws IOException { StringBuilder blockSizeConfigValue = new StringBuilder(); - HTableDescriptor tableDescriptor = table.getTableDescriptor(); + HTableDescriptor tableDescriptor = tableDesc; if (tableDescriptor == null) { // could happen with mock table instance return; @@ -605,15 +627,15 @@ public class HFileOutputFormat2 * Serialize column family to bloom type map to configuration. * Invoked while configuring the MR job for incremental load. * - * @param table to read the properties from + * @param tableDesc to read the properties from * @param conf to persist serialized values into * @throws IOException * on failure to read column family descriptors */ @VisibleForTesting static void configureBloomType( - HTable table, Configuration conf) throws IOException { - HTableDescriptor tableDescriptor = table.getTableDescriptor(); + HTableDescriptor tableDesc, Configuration conf) throws IOException { + HTableDescriptor tableDescriptor = tableDesc; if (tableDescriptor == null) { // could happen with mock table instance return; @@ -641,15 +663,15 @@ public class HFileOutputFormat2 * Serialize column family to data block encoding map to configuration. * Invoked while configuring the MR job for incremental load. * - * @param table to read the properties from + * @param tableDesc to read the properties from * @param conf to persist serialized values into * @throws IOException * on failure to read column family descriptors */ @VisibleForTesting - static void configureDataBlockEncoding(HTable table, + static void configureDataBlockEncoding(HTableDescriptor tableDesc, Configuration conf) throws IOException { - HTableDescriptor tableDescriptor = table.getTableDescriptor(); + HTableDescriptor tableDescriptor = tableDesc; if (tableDescriptor == null) { // could happen with mock table instance return; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java index f259352..ab60760 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java @@ -491,7 +491,7 @@ public class TestHFileOutputFormat { getMockColumnFamiliesForCompression(numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForCompression(table, familyToCompression); - HFileOutputFormat.configureCompression(table, conf); + HFileOutputFormat.configureCompression(table.getTableDescriptor(), conf); // read back family specific compression setting from the configuration Map retrievedFamilyToCompressionMap = HFileOutputFormat @@ -562,7 +562,7 @@ public class TestHFileOutputFormat { HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBloomType(table, familyToBloomType); - HFileOutputFormat.configureBloomType(table, conf); + HFileOutputFormat.configureBloomType(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration @@ -633,7 +633,7 @@ public class TestHFileOutputFormat { HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBlockSize(table, familyToBlockSize); - HFileOutputFormat.configureBlockSize(table, conf); + HFileOutputFormat.configureBlockSize(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration @@ -709,7 +709,7 @@ public class TestHFileOutputFormat { HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding); - HFileOutputFormat.configureDataBlockEncoding(table, conf); + HFileOutputFormat.configureDataBlockEncoding(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java index 0485ac5..ea482fb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java @@ -490,7 +490,7 @@ public class TestHFileOutputFormat2 { getMockColumnFamiliesForCompression(numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForCompression(table, familyToCompression); - HFileOutputFormat2.configureCompression(table, conf); + HFileOutputFormat2.configureCompression(table.getTableDescriptor(), conf); // read back family specific compression setting from the configuration Map retrievedFamilyToCompressionMap = HFileOutputFormat2 @@ -562,7 +562,7 @@ public class TestHFileOutputFormat2 { HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBloomType(table, familyToBloomType); - HFileOutputFormat2.configureBloomType(table, conf); + HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration @@ -633,7 +633,7 @@ public class TestHFileOutputFormat2 { HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBlockSize(table, familyToBlockSize); - HFileOutputFormat2.configureBlockSize(table, conf); + HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration @@ -709,7 +709,7 @@ public class TestHFileOutputFormat2 { HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding); - HFileOutputFormat2.configureDataBlockEncoding(table, conf); + HFileOutputFormat2.configureDataBlockEncoding(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration