Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (revision 1329750) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (working copy) @@ -1192,7 +1192,7 @@ * exception if incorrect, and returns the data portion of the block as * an input stream. */ - DataInputStream nextBlockAsStream(BlockType blockType) throws IOException; + HFileBlock nextBlockAsStream(BlockType blockType) throws IOException; } /** A full-fledged reader with iteration ability. */ @@ -1290,14 +1290,14 @@ } @Override - public DataInputStream nextBlockAsStream(BlockType blockType) + public HFileBlock nextBlockAsStream(BlockType blockType) throws IOException { HFileBlock blk = nextBlock(); if (blk.getBlockType() != blockType) { throw new IOException("Expected block of type " + blockType + " but found " + blk.getBlockType()); } - return blk.getByteStream(); + return blk; } }; } Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (revision 1329750) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (working copy) @@ -531,7 +531,36 @@ } } } + + /** + * Read in the root-level index from the given input stream. Must match + * what was written into the root level by + * {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the + * offset that function returned. + * + * @param in the buffered input stream or wrapped byte input stream + * @param numEntries the number of root-level index entries + * @throws IOException + */ + public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) + throws IOException { + DataInputStream in = blk.getByteStream(); + blockOffsets = new long[numEntries]; + blockKeys = new byte[numEntries][]; + blockDataSizes = new int[numEntries]; + // If index size is zero, no index was written. + if (numEntries > 0) { + for (int i = 0; i < numEntries; ++i) { + long offset = in.readLong(); + int dataSize = in.readInt(); + byte[] key = Bytes.readByteArray(in); + add(key, offset, dataSize); + } + } + return in; + } + /** * Read the root-level metadata of a multi-level block index. Based on * {@link #readRootIndex(DataInput, int)}, but also reads metadata @@ -541,15 +570,18 @@ * @param numEntries the number of root-level index entries * @throws IOException */ - public void readMultiLevelIndexRoot(DataInputStream in, + public void readMultiLevelIndexRoot(HFileBlock blk, final int numEntries) throws IOException { - readRootIndex(in, numEntries); - if (in.available() < MID_KEY_METADATA_SIZE) { + DataInputStream in = readRootIndex(blk, numEntries); + int numBytes = (int) ChecksumUtil.numBytes(blk + .getOnDiskDataSizeWithHeader(), blk.getBytesPerChecksum()); + if ((in.available() - numBytes) < MID_KEY_METADATA_SIZE) { // No mid-key metadata available. return; } midLeafBlockOffset = in.readLong(); + LOG.debug("The midLeafBlockOffset is "+midLeafBlockOffset); midLeafBlockOnDiskSize = in.readInt(); midKeyEntry = in.readInt(); } Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (revision 1329750) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (working copy) @@ -134,7 +134,7 @@ // File info fileInfo = new FileInfo(); - fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); + fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO).getByteStream()); lastKey = fileInfo.get(FileInfo.LASTKEY); avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN)); avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN)); Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (revision 1329750) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (working copy) @@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.RawComparator; @@ -72,9 +71,25 @@ @Test public void testHFileFormatV2() throws IOException { Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), - "testHFileFormatV2"); + "testHFileFormatV2"); + final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ; + final int ENTRY_COUNT = 10000; + writeDataAndReadFromHFile(hfilePath, COMPRESS_ALGO, ENTRY_COUNT, false); + } + + + @Test + public void testMidKeyInHFile() throws IOException{ + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), + "testMidKeyInHFile"); + final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.NONE; + final int ENTRY_COUNT = 50000; + writeDataAndReadFromHFile(hfilePath, COMPRESS_ALGO, ENTRY_COUNT, true); + } - final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ; + private void writeDataAndReadFromHFile(Path hfilePath, + Algorithm COMPRESS_ALGO, int ENTRY_COUNT, boolean findMidKey) throws IOException { + HFileWriterV2 writer = (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf)) .withPath(fs, hfilePath) @@ -88,7 +103,6 @@ Random rand = new Random(9713312); // Just a fixed seed. - final int ENTRY_COUNT = 10000; List keys = new ArrayList(); List values = new ArrayList(); @@ -113,6 +127,7 @@ writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris")); writer.close(); + FSDataInputStream fsdis = fs.open(hfilePath); @@ -145,14 +160,19 @@ dataBlockIndexReader.readMultiLevelIndexRoot( blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); - + + if (findMidKey) { + byte[] midkey = dataBlockIndexReader.midkey(); + assertNotNull("Midkey should not be null", midkey); + } + // Meta index. metaBlockIndexReader.readRootIndex( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockAsStream(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount()); // File info FileInfo fileInfo = new FileInfo(); - fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); + fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO).getByteStream()); byte [] keyValueFormatVersion = fileInfo.get( HFileWriterV2.KEY_VALUE_VERSION); boolean includeMemstoreTS = keyValueFormatVersion != null && @@ -226,6 +246,7 @@ fsdis.close(); } + // Static stuff used by various HFile v2 unit tests private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";