Index: src/main/resources/hbase-default.xml
===================================================================
--- src/main/resources/hbase-default.xml (revision 996633)
+++ src/main/resources/hbase-default.xml (working copy)
@@ -404,11 +404,16 @@
- hfile.min.blocksize.size
+ hbase.mapreduce.hfileoutputformat.blocksize
65536
- Minimum store file block size. The smaller you make this, the
- bigger your index and the less you fetch on a random-access. Set size down
- if you have small cells and want faster random-access of individual cells.
+ The mapreduce HFileOutputFormat writes storefiles/hfiles.
+ This is the minimum hfile blocksize to emit. Usually in hbase, writing
+ hfiles, the blocksize is gotten from the table schema (HColumnDescriptor)
+ but in the mapreduce outputformat context, we don't have access to the
+ schema so get blocksize from Configuation. The smaller you make
+ the blocksize, the bigger your index and the less you fetch on a
+ random-access. Set the blocksize down if you have small cells and want
+ faster random-access of individual cells.
Index: src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java (revision 996633)
+++ src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java (working copy)
@@ -73,7 +73,14 @@
public static final String COMPRESSION = "COMPRESSION";
public static final String BLOCKCACHE = "BLOCKCACHE";
+
+ /**
+ * Size of storefile/hfile 'blocks'. Default is {@link #DEFAULT_BLOCKSIZE}.
+ * Use smaller block sizes for faster random-access at expense of larger
+ * indices (more memory consumption).
+ */
public static final String BLOCKSIZE = "BLOCKSIZE";
+
public static final String LENGTH = "LENGTH";
public static final String TTL = "TTL";
public static final String BLOOMFILTER = "BLOOMFILTER";
@@ -108,8 +115,7 @@
public static final boolean DEFAULT_BLOCKCACHE = true;
/**
- * Default size of blocks in files store to the filesytem. Use smaller for
- * faster random-access at expense of larger indices (more memory consumption).
+ * Default size of blocks in files stored to the filesytem (hfiles).
*/
public static final int DEFAULT_BLOCKSIZE = HFile.DEFAULT_BLOCKSIZE;
@@ -222,7 +228,9 @@
* @param inMemory If true, column data should be kept in an HRegionServer's
* cache
* @param blockCacheEnabled If true, MapFile blocks should be cached
- * @param blocksize
+ * @param blocksize Block size to use when writing out storefiles. Use
+ * smaller blocksizes for faster random-access at expense of larger indices
+ * (more memory consumption). Default is usually 64k.
* @param timeToLive Time-to-live of cell contents, in seconds
* (use HConstants.FOREVER for unlimited TTL)
* @param bloomFilter Bloom filter type for this column
@@ -374,7 +382,7 @@
}
/**
- * @return Blocksize.
+ * @return The storefile/hfile blocksize for this column family.
*/
public synchronized int getBlocksize() {
if (this.blocksize == null) {
@@ -386,7 +394,8 @@
}
/**
- * @param s
+ * @param s Blocksize to use when writing out storefiles/hfiles on this
+ * column family.
*/
public void setBlocksize(int s) {
setValue(BLOCKSIZE, Integer.toString(s));
Index: src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java (revision 996633)
+++ src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java (working copy)
@@ -75,7 +75,8 @@
final FileSystem fs = outputdir.getFileSystem(conf);
// These configs. are from hbase-*.xml
final long maxsize = conf.getLong("hbase.hregion.max.filesize", 268435456);
- final int blocksize = conf.getInt("hfile.min.blocksize.size", 65536);
+ final int blocksize =
+ conf.getInt("hbase.mapreduce.hfileoutputformat.blocksize", 65536);
// Invented config. Add to hbase-*.xml if other than default compression.
final String compression = conf.get("hfile.compression",
Compression.Algorithm.NONE.getName());