diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 72f144df..c840187 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -23,7 +23,6 @@ import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; -import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -112,6 +111,7 @@ import com.google.common.base.Preconditions; public class HFileBlock implements Cacheable { private static final Log LOG = LogFactory.getLog(HFileBlock.class); + // Block header fields. /** Type of block. Header field 0. */ private BlockType blockType; @@ -139,7 +139,7 @@ public class HFileBlock implements Cacheable { * @see Writer#putHeader(byte[], int, int, int, int) */ private int onDiskDataSizeWithHeader; - + // End of Block Header Fields. /** * The in-memory representation of the hfile block. Can be on or offheap. Can be backed by @@ -153,7 +153,7 @@ public class HFileBlock implements Cacheable { *

We are using the ByteBuff type. ByteBuffer is not extensible yet we need to be able to have * a ByteBuffer-like API across multiple ByteBuffers reading from a cache such as BucketCache. * So, we have this ByteBuff type. Unfortunately, it is spread all about HFileBlock. Would be - * good if could be confined to cache-use only but hard-to-do. + * good if could be confined to cache-use only but hard-to-do. TODO. */ private ByteBuff buf; @@ -167,30 +167,28 @@ public class HFileBlock implements Cacheable { */ private long offset = UNSET; - private MemoryType memType = MemoryType.EXCLUSIVE; - /** - * The on-disk size of the next block, including the header and checksums if present, obtained by - * peeking into the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of the next block's - * header, or UNSET if unknown. + * The on-disk size of the next block, including the header and checksums if present. UNSET if unknown. + * This is an optimization. If UNSET, then we do an extra seek. * - * Blocks try to carry the size of the next block to read in this data member. They will even have - * this value when served from cache. Could save a seek in the case where we are iterating through - * a file and some of the blocks come from cache. If from cache, then having this info to hand - * will save us doing a seek to read the header so we can read the body of a block. - * TODO: see how effective this is at saving seeks. + *

Blocks try to carry the size of the next block to read in this data member. Usually we + * get block size from the hfile index but index does not have length of metadata blocks or + * of the index blocks themselves. */ private int nextBlockOnDiskSize = UNSET; /** + * Whether this file block is exclusive owner of the memory it occupies. + */ + private MemoryType memType = MemoryType.EXCLUSIVE; + + /** * On a checksum failure, do these many succeeding read requests using hdfs checksums before * auto-reenabling hbase checksum verification. */ static final int CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD = 3; private static int UNSET = -1; - public static final boolean FILL_HEADER = true; - public static final boolean DONT_FILL_HEADER = false; // How to get the estimate correctly? if it is a singleBB? public static final int MULTI_BYTE_BUFFER_HEAP_SIZE = @@ -198,13 +196,12 @@ public class HFileBlock implements Cacheable { /** * Space for metadata on a block that gets stored along with the block when we cache it. - * There are a few bytes stuck on the end of the HFileBlock that we pull in from HDFS (note, - * when we read from HDFS, we pull in an HFileBlock AND the header of the next block if one). + * There are a few bytes stuck on the end of the HFileBlock that we pull in from HDFS. * 8 bytes are offset of this block (long) in the file. Offset is important because * used when we remake the CacheKey when we return the block to cache when done. There is also * a flag on whether checksumming is being done by hbase or not. See class comment for note on * uncertain state of checksumming of blocks that come out of cache (should we or should we not?). - * Finally there 4 bytes to hold the length of the next block which can save a seek on occasion. + * Finally there are 4 bytes to hold the length of the next block which can save a seek on occasion. *

This EXTRA came in with original commit of the bucketcache, HBASE-7404. Was formerly * known as EXTRA_SERIALIZATION_SPACE. */ @@ -254,9 +251,8 @@ public class HFileBlock implements Cacheable { boolean usesChecksum = buf.get() == (byte)1; long offset = buf.getLong(); int nextBlockOnDiskSize = buf.getInt(); - HFileBlock hFileBlock = - new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null); - return hFileBlock; + return new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, + null); } @Override @@ -308,7 +304,7 @@ public class HFileBlock implements Cacheable { * Copy constructor. Creates a shallow/deep copy of {@code that}'s buffer as per the boolean * param. */ - private HFileBlock(HFileBlock that,boolean bufCopy) { + private HFileBlock(HFileBlock that, boolean bufCopy) { this.blockType = that.blockType; this.onDiskSizeWithoutHeader = that.onDiskSizeWithoutHeader; this.uncompressedSizeWithoutHeader = that.uncompressedSizeWithoutHeader; @@ -340,20 +336,16 @@ public class HFileBlock implements Cacheable { * @param prevBlockOffset see {@link #prevBlockOffset} * @param b block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by * uncompressed data. - * @param fillHeader when true, write the first 4 header fields into passed buffer. * @param offset the file offset the block was read from * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader} * @param fileContext HFile meta data */ HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader, - long prevBlockOffset, ByteBuffer b, boolean fillHeader, long offset, + long prevBlockOffset, ByteBuffer b, long offset, final int nextBlockOnDiskSize, int onDiskDataSizeWithHeader, HFileContext fileContext) { init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset, onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext); this.buf = new SingleByteBuff(b); - if (fillHeader) { - overwriteHeader(); - } this.buf.rewind(); } @@ -391,6 +383,7 @@ public class HFileBlock implements Cacheable { // Need to fix onDiskDataSizeWithHeader; there are not checksums after-block-data onDiskDataSizeWithHeader = onDiskSizeWithoutHeader + headerSize(usesHBaseChecksum); } + this.nextBlockOnDiskSize = nextBlockOnDiskSize; fileContext = fileContextBuilder.build(); assert usesHBaseChecksum == fileContext.isUseHBaseChecksum(); init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, @@ -419,31 +412,31 @@ public class HFileBlock implements Cacheable { } /** - * Parse total ondisk size including header and checksum. - * @param headerBuf Header ByteBuffer. Presumed exact size of header. - * @param verifyChecksum true if checksum verification is in use. - * @return Size of the block with header included. - */ - private static int getOnDiskSizeWithHeader(final ByteBuffer headerBuf, boolean verifyChecksum) { - return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + - headerSize(verifyChecksum); - } - - /** * @return the on-disk size of the next block (including the header size and any checksums if * present) read by peeking into the next block's header; use as a hint when doing - * a read of the next block when scanning or running over a file. + * a read of the next block when scanning or running over a file. Not always available. + * Block size usually gotten from hfile index. */ public int getNextBlockOnDiskSize() { return nextBlockOnDiskSize; } + /** + * Parse total ondisk size including header and checksum. + * @param verifyChecksum true if checksum verification is in use. + * @return Size of the block with header included. + */ + private static int getOnDiskSizeWithHeader(final byte [] hdr, boolean verifyChecksum) { + return Bytes.toInt(hdr, Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + + headerSize(verifyChecksum); + } + public BlockType getBlockType() { return blockType; } /** @return get data block encoding id that was used to encode this block */ - public short getDataBlockEncodingId() { + short getDataBlockEncodingId() { if (blockType != BlockType.ENCODED_DATA) { throw new IllegalArgumentException("Querying encoder ID of a block " + "of type other than " + BlockType.ENCODED_DATA + ": " + blockType); @@ -481,23 +474,6 @@ public class HFileBlock implements Cacheable { } /** - * Rewinds {@code buf} and writes first 4 header fields. {@code buf} position - * is modified as side-effect. - */ - private void overwriteHeader() { - buf.rewind(); - blockType.write(buf); - buf.putInt(onDiskSizeWithoutHeader); - buf.putInt(uncompressedSizeWithoutHeader); - buf.putLong(prevBlockOffset); - if (this.fileContext.isUseHBaseChecksum()) { - buf.put(fileContext.getChecksumType().getCode()); - buf.putInt(fileContext.getBytesPerChecksum()); - buf.putInt(onDiskDataSizeWithHeader); - } - } - - /** * Returns a buffer that does not include the header or checksum. * * @return the buffer with header skipped and checksum omitted. @@ -687,7 +663,8 @@ public class HFileBlock implements Cacheable { } /** An additional sanity-check in case no compression or encryption is being used. */ - public void sanityCheckUncompressedSize() throws IOException { + @VisibleForTesting + void sanityCheckUncompressedSize() throws IOException { if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + totalChecksumBytes()) { throw new IOException("Using no compression but " + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", " @@ -777,43 +754,39 @@ public class HFileBlock implements Cacheable { } /** - * Read from an input stream at least necessaryLen and if possible, - * extraLen also if available. Analogous to + * Read from an input stream. Analogous to * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but uses - * positional read and specifies a number of "extra" bytes that would be - * desirable but not absolutely necessary to read. + * positional read. Takes an extra amount to read if it can. * * @param in the input stream to read from * @param position the position within the stream from which to start reading * @param buf the buffer to read into * @param bufOffset the destination offset in the buffer - * @param necessaryLen the number of bytes that are absolutely necessary to - * read - * @param extraLen the number of extra bytes that would be nice to read + * @param read the number of bytes that are absolutely necessary to read + * @param readExtra the number of extra bytes to read if we possible * @return true if and only if extraLen is > 0 and reading those extra bytes * was successful * @throws IOException if failed to read the necessary bytes */ @VisibleForTesting - static boolean positionalReadWithExtra(FSDataInputStream in, - long position, byte[] buf, int bufOffset, int necessaryLen, int extraLen) - throws IOException { - int bytesRemaining = necessaryLen + extraLen; + static boolean positionalReadWithExtra(FSDataInputStream in, long position, byte[] buf, + int bufOffset, int read, int readExtra) + throws IOException { + int bytesRemaining = read + readExtra; int bytesRead = 0; - while (bytesRead < necessaryLen) { + while (bytesRead < read) { int ret = in.read(position, buf, bufOffset, bytesRemaining); if (ret < 0) { - throw new IOException("Premature EOF from inputStream (positional read " - + "returned " + ret + ", was trying to read " + necessaryLen - + " necessary bytes and " + extraLen + " extra bytes, " - + "successfully read " + bytesRead); + throw new IOException("Premature EOF (positional read returned " + + ret + ", was trying to read " + read + " necessary bytes and " + readExtra + + " extra bytes; successfully read=" + bytesRead); } position += ret; bufOffset += ret; bytesRemaining -= ret; bytesRead += ret; } - return bytesRead != necessaryLen && bytesRemaining <= 0; + return bytesRead != read && bytesRemaining <= 0; } /** @@ -1271,7 +1244,7 @@ public class HFileBlock implements Cacheable { * 0 value in bytesPerChecksum. * *

TODO: Should there be an option where a cache can ask that hbase preserve block - * checksums for checking after a block comes out of the cache? Otehrwise, cache is responsible + * checksums for checking after a block comes out of the cache? Otherwise, cache is responsible * for blocks being wholesome (ECC memory or if file-backed, it does checksumming). */ HFileBlock getBlockForCaching(CacheConfig cacheConf) { @@ -1289,10 +1262,11 @@ public class HFileBlock implements Cacheable { return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(), getUncompressedSizeWithoutHeader(), prevOffset, cacheConf.shouldCacheCompressed(blockType.getCategory())? - getOnDiskBufferWithHeader() : + getOnDiskBufferWithHeader(): getUncompressedBufferWithHeader(), - FILL_HEADER, startOffset, UNSET, - onDiskBlockBytesWithHeader.length + onDiskChecksum.length, newContext); + startOffset, UNSET, + onDiskBlockBytesWithHeader.length + onDiskChecksum.length, + newContext); } } @@ -1367,32 +1341,13 @@ public class HFileBlock implements Cacheable { } /** - * Data-structure to use caching the header of the NEXT block. Only works if next read - * that comes in here is next in sequence in this block. - * - * When we read, we read current block and the next blocks' header. We do this so we have - * the length of the next block to read if the hfile index is not available (rare). - * TODO: Review!! This trick of reading next blocks header is a pain, complicates our - * read path and I don't think it needed given it rare we don't have the block index - * (it is 'normally' present, gotten from the hfile index). FIX!!! - */ - private static class PrefetchedHeader { - long offset = -1; - byte [] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE]; - final ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE); - - @Override - public String toString() { - return "offset=" + this.offset + ", header=" + Bytes.toStringBinary(header); - } - } - - /** - * Reads version 2 blocks from the filesystem. + * An HFileBlock Reader. + * Reads version 2+ blocks from the filesystem. */ static class FSReaderImpl implements FSReader { /** The file system stream of the underlying {@link HFile} that - * does or doesn't do checksum validations in the filesystem */ + * does or doesn't do checksum validations in the filesystem. + */ protected FSDataInputStreamWrapper streamWrapper; private HFileBlockDecodingContext encodedBlockDecodingCtx; @@ -1400,15 +1355,6 @@ public class HFileBlock implements Cacheable { /** Default context used when BlockType != {@link BlockType#ENCODED_DATA}. */ private final HFileBlockDefaultDecodingContext defaultDecodingCtx; - /** - * Cache of the NEXT header after this. Check it is indeed next blocks header - * before using it. TODO: Review. This overread into next block to fetch - * next blocks header seems unnecessary given we usually get the block size - * from the hfile index. Review! - */ - private AtomicReference prefetchedHeader = - new AtomicReference(new PrefetchedHeader()); - /** The size of the file we are reading from, or -1 if unknown. */ protected long fileSize; @@ -1433,10 +1379,11 @@ public class HFileBlock implements Cacheable { this.hfs = hfs; if (path != null) { this.pathName = path.toString(); + LOG.info("REMOVE " + this.pathName + " " + fileContext.getHFileName() + + " SAME?"); } this.fileContext = fileContext; this.hdrSize = headerSize(fileContext.isUseHBaseChecksum()); - this.streamWrapper = stream; // Older versions of HBase didn't support checksum. this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum()); @@ -1448,11 +1395,15 @@ public class HFileBlock implements Cacheable { * A constructor that reads files with the latest minor version. * This is used by unit tests only. */ + @VisibleForTesting FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext) throws IOException { this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext); } + /** + * An Iterator over HFile Blocks. + */ public BlockIterator blockRange(final long startOffset, final long endOffset) { final FSReader owner = this; // handle for inner class return new BlockIterator() { @@ -1491,9 +1442,9 @@ public class HFileBlock implements Cacheable { * @param dest destination buffer * @param destOffset offset into the destination buffer at where to put the bytes we read * @param size size of read - * @param peekIntoNextBlock whether to read the next block's on-disk size * @param fileOffset position in the stream to read at * @param pread whether we should do a positional read + * @param peekIntoNextBlock whether to read the next block's on-disk size * @param istream The input source of data * @return the on-disk size of the next block with header size included, or * -1 if it could not be determined; if not -1, the dest INCLUDES the @@ -1501,7 +1452,8 @@ public class HFileBlock implements Cacheable { * @throws IOException */ protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, - boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { + boolean peekIntoNextBlock, long fileOffset, boolean pread) + throws IOException { if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) { // We are asked to read the next block's header as well, but there is // not enough room in the array. @@ -1509,19 +1461,15 @@ public class HFileBlock implements Cacheable { hdrSize + " bytes of next header into a " + dest.length + "-byte array at offset " + destOffset); } - if (!pread && streamLock.tryLock()) { // Seek + read. Better for scanning. try { istream.seek(fileOffset); - long realOffset = istream.getPos(); if (realOffset != fileOffset) { - throw new IOException("Tried to seek to " + fileOffset + " to " - + "read " + size + " bytes, but pos=" + realOffset - + " after seek"); + throw new IOException("Tried to seek to " + fileOffset + " to read " + size + + " bytes, but pos=" + realOffset + " after seek"); } - if (!peekIntoNextBlock) { IOUtils.readFully(istream, dest, destOffset, size); return -1; @@ -1541,7 +1489,6 @@ public class HFileBlock implements Cacheable { return -1; } } - assert peekIntoNextBlock; return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize; } @@ -1642,7 +1589,7 @@ public class HFileBlock implements Cacheable { * is not right. * @throws IOException */ - private void verifyOnDiskSizeMatchesHeader(final int passedIn, final ByteBuffer headerBuf, + private void verifyOnDiskSizeMatchesHeader(final int passedIn, final byte [] headerBuf, final long offset, boolean verifyChecksum) throws IOException { // Assert size provided aligns with what is in the header @@ -1654,34 +1601,6 @@ public class HFileBlock implements Cacheable { } /** - * Check atomic reference cache for this block's header. Cache only good if next - * read coming through is next in sequence in the block. We read next block's - * header on the tail of reading the previous block to save a seek. Otherwise, - * we have to do a seek to read the header before we can pull in the block OR - * we have to backup the stream because we over-read (the next block's header). - * @see PrefetchedHeader - * @return The cached block header or null if not found. - * @see #cacheNextBlockHeader(long, byte[], int, int) - */ - private ByteBuffer getCachedHeader(final long offset) { - PrefetchedHeader ph = this.prefetchedHeader.get(); - return ph != null && ph.offset == offset? ph.buf: null; - } - - /** - * Save away the next blocks header in atomic reference. - * @see #getCachedHeader(long) - * @see PrefetchedHeader - */ - private void cacheNextBlockHeader(final long offset, - final byte [] header, final int headerOffset, final int headerLength) { - PrefetchedHeader ph = new PrefetchedHeader(); - ph.offset = offset; - System.arraycopy(header, headerOffset, ph.header, 0, headerLength); - this.prefetchedHeader.set(ph); - } - - /** * Reads a version 2 block. * * @param offset the offset in the stream to read at. Usually the @@ -1691,8 +1610,11 @@ public class HFileBlock implements Cacheable { * the first read of a new file (TODO: Fix! See HBASE-17072). Usually non-null gotten * from the file index. * @param pread whether to use a positional read - * @param verifyChecksum Whether to use HBase checksums. - * If HBase checksum is switched off, then use HDFS checksum. + * @param verifyChecksum Whether to use HBase checksums. Can flip on the stream as we read + * if we falter on hbase checksumming. In this case we'll fall back to hdfs checksums a + * while and then flip back to hbase checksumming again if we can. This flag then is not + * about whether we support hbase checksumming or not but whether to make use of them at + * this current time. * @return the HFileBlock or null if there is a HBase checksum mismatch */ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, @@ -1703,69 +1625,50 @@ public class HFileBlock implements Cacheable { + "block (onDiskSize=" + onDiskSizeWithHeaderL + ")"); } int onDiskSizeWithHeader = checkAndGetSizeAsInt(onDiskSizeWithHeaderL, hdrSize); - // Try and get cached header. Will serve us in rare case where onDiskSizeWithHeaderL is -1 - // and will save us having to seek the stream backwards to reread the header we - // read the last time through here. - ByteBuffer headerBuf = getCachedHeader(offset); if (LOG.isTraceEnabled()) { LOG.trace("Reading " + this.fileContext.getHFileName() + " at offset=" + offset + - ", pread=" + pread + ", verifyChecksum=" + verifyChecksum + ", cachedHeader=" + - headerBuf + ", onDiskSizeWithHeader=" + onDiskSizeWithHeader); + ", pread=" + pread + ", verifyChecksum=" + verifyChecksum + + ", onDiskSizeWithHeader=" + onDiskSizeWithHeader); } + // This checksum flag is different from verifyChecksum. It is whether we support + // checksums in this hfile, NOT whether we should do them or not. + boolean checksum = this.fileContext.isUseHBaseChecksum(); + byte [] hdr = null; if (onDiskSizeWithHeader <= 0) { - // We were not passed the block size. Need to get it from the header. If header was not in - // cache, need to seek to pull it in. This is costly and should happen very rarely. - // Currently happens on open of a hfile reader where we read the trailer blocks for - // indices. Otherwise, we are reading block sizes out of the hfile index. To check, - // enable TRACE in this file and you'll get an exception in a LOG every time we seek. - // See HBASE-17072 for more detail. - if (headerBuf == null) { - if (LOG.isTraceEnabled()) { - LOG.trace("Extra see to get block size!", new RuntimeException()); - } - headerBuf = ByteBuffer.allocate(hdrSize); - readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(), hdrSize, false, - offset, pread); + // We were not passed the block size. Need to get it from header. This is costly and + // should happen very rarely. Currently happens on open of a hfile reader where we + // read the trailer blocks for indices. Otherwise, we are reading block sizes out of + // the hfile index. To check, enable TRACE in this file and you'll get an exception + // in a LOG every time we seek. See HBASE-17072 for more detail. + if (LOG.isTraceEnabled()) { + LOG.trace("Extra see to get block size!", new RuntimeException()); } - onDiskSizeWithHeader = getOnDiskSizeWithHeader(headerBuf, - this.fileContext.isUseHBaseChecksum()); + hdr = new byte [hdrSize]; + readAtOffset(is, hdr, 0, hdr.length, false, offset, pread); + onDiskSizeWithHeader = getOnDiskSizeWithHeader(hdr, checksum); } - int preReadHeaderSize = headerBuf == null? 0 : hdrSize; - // Allocate enough space to fit the next block's header too; saves a seek next time through. - // onDiskBlock is whole block + header + checksums then extra hdrSize to read next header; - // onDiskSizeWithHeader is header, body, and any checksums if present. preReadHeaderSize - // says where to start reading. If we have the header cached, then we don't need to read - // it again and we can likely read from last place we left off w/o need to backup and reread - // the header we read last time through here. TODO: Review this overread of the header. Is it necessary - // when we get the block size from the hfile index? See note on PrefetchedHeader class above. - // TODO: Make this ByteBuffer-based. Will make it easier to go to HDFS with BBPool (offheap). - byte [] onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; - int nextBlockOnDiskSize = readAtOffset(is, onDiskBlock, preReadHeaderSize, - onDiskSizeWithHeader - preReadHeaderSize, true, offset + preReadHeaderSize, pread); - if (headerBuf != null) { - // The header has been read when reading the previous block OR in a distinct header-only - // read. Copy to this block's header. - System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize); - } else { - headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize); + // onDiskSizeWithHeader is header, body, and any checksums if present. + // If we read hdr above, then don't reread it (hdr==null check). + // TODO: MakeByteBuffer-based. Will make it easier to go to HDFS with BBPool (offheap). + byte [] onDiskBlock = new byte[onDiskSizeWithHeader]; + int blockOffset = hdr == null? 0: hdr.length; + readAtOffset(is, onDiskBlock, blockOffset, onDiskSizeWithHeader - blockOffset, + true, offset + blockOffset, pread); + if (hdr != null) { + // We read hdr earlier; copy to top of the block. + System.arraycopy(hdr, 0, onDiskBlock, 0, hdrSize); } // Do a few checks before we go instantiate HFileBlock. assert onDiskSizeWithHeader > this.hdrSize; - verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset, - this.fileContext.isUseHBaseChecksum()); + verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, onDiskBlock, offset, checksum); ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader); // Verify checksum of the data before using it for building HFileBlock. - if (verifyChecksum && - !validateChecksum(offset, onDiskBlockByteBuffer, hdrSize)) { + if (verifyChecksum && !validateChecksum(offset, onDiskBlockByteBuffer, hdrSize)) { return null; } // The onDiskBlock will become the headerAndDataBuffer for this block. - // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already - // contains the header of next block, so no need to set next block's header in it. - HFileBlock hFileBlock = - new HFileBlock(new SingleByteBuff(onDiskBlockByteBuffer), - this.fileContext.isUseHBaseChecksum(), MemoryType.EXCLUSIVE, offset, - nextBlockOnDiskSize, fileContext); + HFileBlock hFileBlock = new HFileBlock(new SingleByteBuff(onDiskBlockByteBuffer), + checksum, MemoryType.EXCLUSIVE, offset, this.fileContext); // Run check on uncompressed sizings. if (!fileContext.isCompressedOrEncrypted()) { hFileBlock.sanityCheckUncompressed(); @@ -1773,11 +1676,6 @@ public class HFileBlock implements Cacheable { if (LOG.isTraceEnabled()) { LOG.trace("Read " + hFileBlock); } - // Cache next block header if we read it for the next time through here. - if (nextBlockOnDiskSize != -1) { - cacheNextBlockHeader(offset + hFileBlock.getOnDiskSizeWithHeader(), - onDiskBlock, onDiskSizeWithHeader, hdrSize); - } return hFileBlock; } @@ -1880,9 +1778,9 @@ public class HFileBlock implements Cacheable { * @return The passed destination with metadata added. */ private ByteBuffer addMetaData(final ByteBuffer destination) { - destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0); + destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0); // TODO!!! NEED? destination.putLong(this.offset); - destination.putInt(this.nextBlockOnDiskSize); + destination.putInt(getNextBlockOnDiskSize()); return destination; } @@ -1922,9 +1820,6 @@ public class HFileBlock implements Cacheable { if (castedComparison.blockType != this.blockType) { return false; } - if (castedComparison.nextBlockOnDiskSize != this.nextBlockOnDiskSize) { - return false; - } // Offset is important. Needed when we have to remake cachekey when block is returned to cache. if (castedComparison.offset != this.offset) { return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 4cf1bf2..7ccefa5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -255,16 +255,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { LOG.trace("Prefetch start " + getPathOffsetEndStr(path, offset, end)); } // TODO: Could we use block iterator in here? Would that get stuff into the cache? - HFileBlock prevBlock = null; while (offset < end) { if (Thread.interrupted()) { break; } - // Perhaps we got our block from cache? Unlikely as this may be, if it happens, then - // the internal-to-hfileblock thread local which holds the overread that gets the - // next header, will not have happened...so, pass in the onDiskSize gotten from the - // cached block. This 'optimization' triggers extremely rarely I'd say. - long onDiskSize = prevBlock != null? prevBlock.getNextBlockOnDiskSize(): -1; + // TODO + long onDiskSize = -1; HFileBlock block = readBlock(offset, onDiskSize, true, false, false, false, null, null); // Need not update the current block. Ideally here the readBlock won't find the @@ -272,7 +268,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // cached in BC. So there is no reference count increment that happens here. // The return will ideally be a noop because the block is not of MemoryType SHARED. returnBlock(block); - prevBlock = block; offset += block.getOnDiskSizeWithHeader(); } } catch (IOException e) { @@ -916,8 +911,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // We are reading the next block without block type validation, because // it might turn out to be a non-data block. - block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), - block.getNextBlockOnDiskSize(), cacheBlocks, pread, + // TODO: USE BLOCK ITERATOR? + block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), -1, cacheBlocks, pread, isCompaction, true, null, getEffectiveDataBlockEncoding()); if (block != null && !block.getBlockType().isData()) { // Findbugs: NP_NULL_ON_SOME_PATH // Whatever block we read we will be returning it unless diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java index bd3f4c7..f3ccb62 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java @@ -367,9 +367,8 @@ public class CacheTestUtils { .build(); HFileBlock generated = new HFileBlock(BlockType.DATA, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, - prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER, - blockSize, - onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, -1, meta); + prevBlockOffset, cachedBuffer, blockSize, + onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, meta); String strKey; /* No conflicting keys */ diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index c4950c3..240dba7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -372,16 +372,11 @@ public class TestChecksum { } @Override - protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, - boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { - int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock, - fileOffset, pread); + protected void readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, + long fileOffset, boolean pread) throws IOException { + super.readAtOffset(istream, dest, destOffset, size, fileOffset, pread); if (!corruptDataStream) { - return returnValue; - } - // Corrupt 3rd character of block magic of next block's header. - if (peekIntoNextBlock) { - dest[destOffset + size + 3] = 0b00000000; + return; } // We might be reading this block's header too, corrupt it. dest[destOffset + 1] = 0b00000000; @@ -389,7 +384,6 @@ public class TestChecksum { if (size > hdrSize) { dest[destOffset + hdrSize + 1] = 0b00000000; } - return returnValue; } } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index c75232a..7cc72c5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -838,8 +838,7 @@ public class TestHFileBlock { .withCompression(Algorithm.NONE) .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM) .withChecksumType(ChecksumType.NULL).build(); - HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, - HFileBlock.FILL_HEADER, -1, 0, -1, meta); + HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, -1, 0, meta); long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase( new MultiByteBuff(buf).getClass(), true) + HConstants.HFILEBLOCK_HEADER_SIZE + size); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java index a4f2338..a9d65ec 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java @@ -49,9 +49,9 @@ public class TestHFileBlockPositionalRead { byte[] buf = new byte[totalLen]; FSDataInputStream in = mock(FSDataInputStream.class); when(in.read(position, buf, bufOffset, totalLen)).thenReturn(totalLen); - boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf, - bufOffset, necessaryLen, extraLen); - assertFalse("Expect false return when no extra bytes requested", ret); + int ret = HFileBlock.positionalReadWithExtra(in, position, buf, + bufOffset, necessaryLen); + assertEquals(necessaryLen, ret); verify(in).read(position, buf, bufOffset, totalLen); verifyNoMoreInteractions(in); } @@ -67,9 +67,9 @@ public class TestHFileBlockPositionalRead { FSDataInputStream in = mock(FSDataInputStream.class); when(in.read(position, buf, bufOffset, totalLen)).thenReturn(5); when(in.read(5, buf, 5, 5)).thenReturn(5); - boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf, - bufOffset, necessaryLen, extraLen); - assertFalse("Expect false return when no extra bytes requested", ret); + int ret = HFileBlock.positionalReadWithExtra(in, position, buf, + bufOffset, necessaryLen); + assertEquals(necessaryLen, ret); verify(in).read(position, buf, bufOffset, totalLen); verify(in).read(5, buf, 5, 5); verifyNoMoreInteractions(in); @@ -85,9 +85,9 @@ public class TestHFileBlockPositionalRead { byte[] buf = new byte[totalLen]; FSDataInputStream in = mock(FSDataInputStream.class); when(in.read(position, buf, bufOffset, totalLen)).thenReturn(totalLen); - boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf, - bufOffset, necessaryLen, extraLen); - assertTrue("Expect true return when reading extra bytes succeeds", ret); + int ret = HFileBlock.positionalReadWithExtra(in, position, buf, + bufOffset, necessaryLen); + assertEquals(necessaryLen, ret); verify(in).read(position, buf, bufOffset, totalLen); verifyNoMoreInteractions(in); } @@ -102,9 +102,9 @@ public class TestHFileBlockPositionalRead { byte[] buf = new byte[totalLen]; FSDataInputStream in = mock(FSDataInputStream.class); when(in.read(position, buf, bufOffset, totalLen)).thenReturn(necessaryLen); - boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf, - bufOffset, necessaryLen, extraLen); - assertFalse("Expect false return when reading extra bytes fails", ret); + int ret = HFileBlock.positionalReadWithExtra(in, position, buf, + bufOffset, necessaryLen); + assertEquals(necessaryLen, ret); verify(in).read(position, buf, bufOffset, totalLen); verifyNoMoreInteractions(in); } @@ -121,9 +121,9 @@ public class TestHFileBlockPositionalRead { FSDataInputStream in = mock(FSDataInputStream.class); when(in.read(position, buf, bufOffset, totalLen)).thenReturn(5); when(in.read(5, buf, 5, 10)).thenReturn(10); - boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf, - bufOffset, necessaryLen, extraLen); - assertTrue("Expect true return when reading extra bytes succeeds", ret); + int ret = HFileBlock.positionalReadWithExtra(in, position, buf, + bufOffset, necessaryLen); + assertEquals(necessaryLen, ret); verify(in).read(position, buf, bufOffset, totalLen); verify(in).read(5, buf, 5, 10); verifyNoMoreInteractions(in); @@ -142,7 +142,6 @@ public class TestHFileBlockPositionalRead { when(in.read(position, buf, bufOffset, totalLen)).thenReturn(-1); exception.expect(IOException.class); exception.expectMessage("EOF"); - HFileBlock.positionalReadWithExtra(in, position, buf, bufOffset, - necessaryLen, extraLen); + HFileBlock.positionalReadWithExtra(in, position, buf, bufOffset, necessaryLen); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java index 387514e..f02d577 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java @@ -124,9 +124,8 @@ public class TestHFileDataBlockEncoder { .withBlockSize(0) .withChecksumType(ChecksumType.NULL) .build(); - HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, - HFileBlock.FILL_HEADER, 0, - 0, -1, hfileContext); + HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, 0, + 0, hfileContext); HFileBlock cacheBlock = createBlockOnDisk(kvs, block, useTags); assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length); } @@ -195,9 +194,8 @@ public class TestHFileDataBlockEncoder { .withBlockSize(0) .withChecksumType(ChecksumType.NULL) .build(); - HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf, - HFileBlock.FILL_HEADER, 0, - 0, -1, meta); + HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf, 0, + 0, meta); return b; } @@ -219,8 +217,7 @@ public class TestHFileDataBlockEncoder { byte[] encodedBytes = baos.toByteArray(); size = encodedBytes.length - block.getDummyHeaderForVersion().length; return new HFileBlock(context.getBlockType(), size, size, -1, ByteBuffer.wrap(encodedBytes), - HFileBlock.FILL_HEADER, 0, block.getOnDiskDataSizeWithHeader(), -1, - block.getHFileContext()); + 0, block.getOnDiskDataSizeWithHeader(), block.getHFileContext()); } private void writeBlock(List kvs, HFileContext fileContext, boolean useTags)