diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java index a5d7c59..d544c48 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java @@ -42,6 +42,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; @@ -301,6 +302,17 @@ public class TableMapReduceUtil { initTableMapperJob(snapshotName, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class); + /* + * Disable blockcache for these jobs. Any BlockCache implementation based on direct memory + * will likely cause the map tasks to OOM when opening the region. This is done here instead + * of TableSnapshotRegionRecordReader in case an advanced user wants to override this + * behavior in their job. + * TODO: evaluate performance implication of running scan with no cache. Could be we need + * to fall back onto an LruBlockCache instance of with a specified size. Will be easier + * after HBASE-10403. + */ + job.getConfiguration().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); + // We would need even more libraries that hbase-server depends on TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Counter.class); }