Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (revision 1329056) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (working copy) @@ -1189,10 +1189,9 @@ /** * Similar to {@link #nextBlock()} but checks block type, throws an - * exception if incorrect, and returns the data portion of the block as - * an input stream. + * exception if incorrect, and returns the HFile block */ - DataInputStream nextBlockAsStream(BlockType blockType) throws IOException; + HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException; } /** A full-fledged reader with iteration ability. */ @@ -1290,14 +1289,14 @@ } @Override - public DataInputStream nextBlockAsStream(BlockType blockType) + public HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException { HFileBlock blk = nextBlock(); if (blk.getBlockType() != blockType) { throw new IOException("Expected block of type " + blockType + " but found " + blk.getBlockType()); } - return blk.getByteStream(); + return blk; } }; } Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (revision 1329056) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (working copy) @@ -531,24 +531,43 @@ } } } + + /** + * Read in the root-level index from the given input stream. Must match + * what was written into the root level by + * {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the + * offset that function returned. + * + * @param blk the HFile block + * @param numEntries the number of root-level index entries + * @return the buffered input stream or wrapped byte input stream + * @throws IOException + */ + public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException { + DataInputStream in = blk.getByteStream(); + readRootIndex(in, numEntries); + return in; + } /** * Read the root-level metadata of a multi-level block index. Based on * {@link #readRootIndex(DataInput, int)}, but also reads metadata * necessary to compute the mid-key in a multi-level index. * - * @param in the buffered or byte input stream to read from + * @param blk the HFile block * @param numEntries the number of root-level index entries * @throws IOException */ - public void readMultiLevelIndexRoot(DataInputStream in, + public void readMultiLevelIndexRoot(HFileBlock blk, final int numEntries) throws IOException { - readRootIndex(in, numEntries); - if (in.available() < MID_KEY_METADATA_SIZE) { + DataInputStream in = readRootIndex(blk, numEntries); + // after reading the root index the check sum bytes has to + // be subracted to know if the mid key exists. + int checkSumBytes = blk.totalChecksumBytes(); + if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) { // No mid-key metadata available. return; } - midLeafBlockOffset = in.readLong(); midLeafBlockOnDiskSize = in.readInt(); midKeyEntry = in.readInt(); @@ -761,7 +780,7 @@ if (LOG.isTraceEnabled()) { LOG.trace("Wrote a " + numLevels + "-level index with root level at pos " - + out.getPos() + ", " + rootChunk.getNumEntries() + + rootLevelIndexPos + ", " + rootChunk.getNumEntries() + " root-level entries, " + totalNumEntries + " total entries, " + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) + " on-disk size, " Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (revision 1329056) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (working copy) @@ -124,17 +124,17 @@ // Data index. We also read statistics about the block index written after // the root level. dataBlockIndexReader.readMultiLevelIndexRoot( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); // Meta index. metaBlockIndexReader.readRootIndex( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getMetaIndexCount()); // File info fileInfo = new FileInfo(); - fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); + fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); lastKey = fileInfo.get(FileInfo.LASTKEY); avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN)); avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN)); Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (revision 1329056) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (working copy) @@ -176,7 +176,7 @@ Bytes.BYTES_RAWCOMPARATOR, numLevels, brw); indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, - fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries); + fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); long prevOffset = -1; int i = 0; Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (revision 1329056) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (working copy) @@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.RawComparator; @@ -72,14 +71,30 @@ @Test public void testHFileFormatV2() throws IOException { Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), - "testHFileFormatV2"); + "testHFileFormatV2"); + final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ; + final int entryCount = 10000; + writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false); + } + + + @Test + public void testMidKeyInHFile() throws IOException{ + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), + "testMidKeyInHFile"); + Compression.Algorithm compressAlgo = Compression.Algorithm.NONE; + int entryCount = 50000; + writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true); + } - final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ; + private void writeDataAndReadFromHFile(Path hfilePath, + Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException { + HFileWriterV2 writer = (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf)) .withPath(fs, hfilePath) .withBlockSize(4096) - .withCompression(COMPRESS_ALGO) + .withCompression(compressAlgo) .withComparator(KeyValue.KEY_COMPARATOR) .create(); @@ -88,11 +103,10 @@ Random rand = new Random(9713312); // Just a fixed seed. - final int ENTRY_COUNT = 10000; List keys = new ArrayList(); List values = new ArrayList(); - for (int i = 0; i < ENTRY_COUNT; ++i) { + for (int i = 0; i < entryCount; ++i) { byte[] keyBytes = randomOrderedKey(rand, i); // A random-length random value. @@ -113,6 +127,7 @@ writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris")); writer.close(); + FSDataInputStream fsdis = fs.open(hfilePath); @@ -124,10 +139,10 @@ FixedFileTrailer.readFromStream(fsdis, fileSize); assertEquals(2, trailer.getMajorVersion()); - assertEquals(ENTRY_COUNT, trailer.getEntryCount()); + assertEquals(entryCount, trailer.getEntryCount()); HFileBlock.FSReader blockReader = - new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize); + new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize); // Comparator class name is stored in the trailer in version 2. RawComparator comparator = trailer.createComparator(); HFileBlockIndex.BlockIndexReader dataBlockIndexReader = @@ -143,16 +158,21 @@ // Data index. We also read statistics about the block index written after // the root level. dataBlockIndexReader.readMultiLevelIndexRoot( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); - + + if (findMidKey) { + byte[] midkey = dataBlockIndexReader.midkey(); + assertNotNull("Midkey should not be null", midkey); + } + // Meta index. metaBlockIndexReader.readRootIndex( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount()); // File info FileInfo fileInfo = new FileInfo(); - fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); + fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); byte [] keyValueFormatVersion = fileInfo.get( HFileWriterV2.KEY_VALUE_VERSION); boolean includeMemstoreTS = keyValueFormatVersion != null && @@ -200,7 +220,7 @@ } LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead=" + blocksRead); - assertEquals(ENTRY_COUNT, entriesRead); + assertEquals(entryCount, entriesRead); // Meta blocks. We can scan until the load-on-open data offset (which is // the root block index offset in version 2) because we are not testing @@ -226,6 +246,7 @@ fsdis.close(); } + // Static stuff used by various HFile v2 unit tests private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";