From acb52ae3a4b8e129f5936732c64a8eb30172d09f Mon Sep 17 00:00:00 2001 From: Apekshit Date: Thu, 5 May 2016 17:05:17 -0700 Subject: [PATCH] HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum. Change-Id: I107e9636b28abb6b15ec329e885f1e31b1b1b988 Signed-off-by: stack --- .../apache/hadoop/hbase/io/hfile/ChecksumUtil.java | 54 +++---- .../apache/hadoop/hbase/io/hfile/HFileBlock.java | 180 +++++++++++---------- .../apache/hadoop/hbase/io/hfile/TestChecksum.java | 59 +++++-- 3 files changed, 164 insertions(+), 129 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index c3a73ac..5a12daa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -79,46 +79,35 @@ public class ChecksumUtil { } /** - * Validates that the data in the specified HFileBlock matches the - * checksum. Generates the checksum for the data and - * then validate that it matches the value stored in the header. - * If there is a checksum mismatch, then return false. Otherwise - * return true. - * The header is extracted from the specified HFileBlock while the - * data-to-be-verified is extracted from 'data'. + * Validates that the data in the specified HFileBlock matches the checksum. Generates the + * checksums for the data and then validate that it matches those stored in the end of the data. + * @param buffer Contains the data in following order: HFileBlock header, data, checksums. + * @param path Path of the HFile to which the {@code data} belongs. Only used for logging. + * @param offset offset of the data being validated. Only used for logging. + * @param hdrSize Size of the block header in {@code data}. Only used for logging. + * @return True if checksum matches, else false. */ - static boolean validateBlockChecksum(Path path, long offset, HFileBlock block, - byte[] data, int hdrSize) throws IOException { - - // If this is an older version of the block that does not have - // checksums, then return false indicating that checksum verification - // did not succeed. Actually, this methiod should never be called - // when the minorVersion is 0, thus this is a defensive check for a - // cannot-happen case. Since this is a cannot-happen case, it is - // better to return false to indicate a checksum validation failure. - if (!block.getHFileContext().isUseHBaseChecksum()) { - return false; - } - - // Get a checksum object based on the type of checksum that is - // set in the HFileBlock header. A ChecksumType.NULL indicates that - // the caller is not interested in validating checksums, so we - // always return true. - ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); + static boolean validateChecksum(ByteBuffer buffer, Path path, long offset, int hdrSize) + throws IOException { + // A ChecksumType.NULL indicates that the caller is not interested in validating checksums, + // so we always return true. + ChecksumType cktype = + ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX)); if (cktype == ChecksumType.NULL) { return true; // No checksum validations needed for this block. } // read in the stored value of the checksum size from the header. - int bytesPerChecksum = block.getBytesPerChecksum(); + int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX); DataChecksum dataChecksum = DataChecksum.newDataChecksum( cktype.getDataChecksumType(), bytesPerChecksum); assert dataChecksum != null; - int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); + int onDiskDataSizeWithHeader = + buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); if (LOG.isTraceEnabled()) { - LOG.info("dataLength=" + data.length - + ", sizeWithHeader=" + sizeWithHeader + LOG.info("dataLength=" + buffer.capacity() + + ", sizeWithHeader=" + onDiskDataSizeWithHeader + ", checksumType=" + cktype.getName() + ", file=" + path.toString() + ", offset=" + offset @@ -126,9 +115,10 @@ public class ChecksumUtil { + ", bytesPerChecksum=" + bytesPerChecksum); } try { - dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), - ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), - path.toString(), 0); + ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader); + ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader) + .limit(buffer.capacity()); + dataChecksum.verifyChunkedSums(data, checksums, path.toString(), 0); } catch (ChecksumException e) { return false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index e2f3b44..c11fb2d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -158,6 +158,26 @@ public class HFileBlock implements Cacheable { .registerDeserializer(blockDeserializer); } + // Todo: encapsulate Header related logic in this inner class. + static class Header { + // Format of header is: + // 8 bytes - block magic + // 4 bytes int - onDiskSizeWithoutHeader + // 4 bytes int - uncompressedSizeWithoutHeader + // 8 bytes long - prevBlockOffset + // The following 3 are only present if header contains checksum information + // 1 byte - checksum type + // 4 byte int - bytes per checksum + // 4 byte int - onDiskDataSizeWithHeader + static int BLOCK_MAGIC_INDEX = 0; + static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8; + static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12; + static int PREV_BLOCK_OFFSET_INDEX = 16; + static int CHECKSUM_TYPE_INDEX = 24; + static int BYTES_PER_CHECKSUM_INDEX = 25; + static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29; + } + /** Type of block. Header field 0. */ private BlockType blockType; @@ -253,15 +273,15 @@ public class HFileBlock implements Cacheable { HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException { b.rewind(); blockType = BlockType.read(b); - onDiskSizeWithoutHeader = b.getInt(); - uncompressedSizeWithoutHeader = b.getInt(); - prevBlockOffset = b.getLong(); + onDiskSizeWithoutHeader = b.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + uncompressedSizeWithoutHeader = b.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX); + prevBlockOffset = b.getLong(Header.PREV_BLOCK_OFFSET_INDEX); HFileContextBuilder contextBuilder = new HFileContextBuilder(); contextBuilder.withHBaseCheckSum(usesHBaseChecksum); if (usesHBaseChecksum) { - contextBuilder.withChecksumType(ChecksumType.codeToType(b.get())); - contextBuilder.withBytesPerCheckSum(b.getInt()); - this.onDiskDataSizeWithHeader = b.getInt(); + contextBuilder.withChecksumType(ChecksumType.codeToType(b.get(Header.CHECKSUM_TYPE_INDEX))); + contextBuilder.withBytesPerCheckSum(b.getInt(Header.BYTES_PER_CHECKSUM_INDEX)); + this.onDiskDataSizeWithHeader = b.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); } else { contextBuilder.withChecksumType(ChecksumType.NULL); contextBuilder.withBytesPerCheckSum(0); @@ -486,23 +506,21 @@ public class HFileBlock implements Cacheable { /** * Called after reading a block with provided onDiskSizeWithHeader. */ - private void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader) - throws IOException { - if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) { + private static void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader, + int actualOnDiskSizeWithoutHeader, ByteBuffer buf, long offset) throws IOException { + if (actualOnDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) { + // We make the read-only copy here instead of when passing the parameter to this function + // to make duplicates only in failure cases, instead of every single time. + ByteBuffer bufReadOnly = buf.asReadOnlyBuffer(); String dataBegin = null; - if (buf.hasArray()) { - dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset(), Math.min(32, buf.limit())); - } else { - ByteBuffer bufDup = getBufferReadOnly(); - byte[] dataBeginBytes = new byte[Math.min(32, bufDup.limit() - bufDup.position())]; - bufDup.get(dataBeginBytes); - dataBegin = Bytes.toStringBinary(dataBeginBytes); - } + byte[] dataBeginBytes = new byte[Math.min(32, bufReadOnly.limit() - bufReadOnly.position())]; + bufReadOnly.get(dataBeginBytes); + dataBegin = Bytes.toStringBinary(dataBeginBytes); String blockInfoMsg = "Block offset: " + offset + ", data starts with: " + dataBegin; throw new IOException("On-disk size without header provided is " + expectedOnDiskSizeWithoutHeader + ", but block " - + "header contains " + onDiskSizeWithoutHeader + ". " + + + "header contains " + actualOnDiskSizeWithoutHeader + ". " + blockInfoMsg); } } @@ -597,13 +615,18 @@ public class HFileBlock implements Cacheable { } /** An additional sanity-check in case no compression or encryption is being used. */ - public void assumeUncompressed() throws IOException { - if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + - totalChecksumBytes()) { + public static void verifyUncompressed(ByteBuffer buf, HFileContext fileContext) + throws IOException { + int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + int uncompressedSizeWithoutHeader = buf.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX); + int onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); + int checksumBytes = totalChecksumBytes(fileContext, onDiskDataSizeWithHeader); + + if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + checksumBytes) { throw new IOException("Using no compression but " + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", " + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader - + ", numChecksumbytes=" + totalChecksumBytes()); + + ", numChecksumbytes=" + checksumBytes); } } @@ -1380,10 +1403,8 @@ public class HFileBlock implements Cacheable { * -1 if it could not be determined * @throws IOException */ - protected int readAtOffset(FSDataInputStream istream, - byte[] dest, int destOffset, int size, - boolean peekIntoNextBlock, long fileOffset, boolean pread) - throws IOException { + protected int readAtOffset(FSDataInputStream istream, byte[] dest, int destOffset, int size, + boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) { // We are asked to read the next block's header as well, but there is @@ -1503,10 +1524,8 @@ public class HFileBlock implements Cacheable { boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum(); FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum); - HFileBlock blk = readBlockDataInternal(is, offset, - onDiskSizeWithHeaderL, - uncompressedSize, pread, - doVerificationThruHBaseChecksum); + HFileBlock blk = readBlockDataInternal(is, offset, (int) onDiskSizeWithHeaderL, + uncompressedSize, pread, doVerificationThruHBaseChecksum); if (blk == null) { HFile.LOG.warn("HBase checksum verification failed for file " + path + " at offset " + @@ -1532,9 +1551,8 @@ public class HFileBlock implements Cacheable { // a few more than precisely this number. is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD); doVerificationThruHBaseChecksum = false; - blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, - uncompressedSize, pread, - doVerificationThruHBaseChecksum); + blk = readBlockDataInternal(is, offset, (int) onDiskSizeWithHeaderL, uncompressedSize, + pread, doVerificationThruHBaseChecksum); if (blk != null) { HFile.LOG.warn("HDFS checksum verification suceeded for file " + path + " at offset " + @@ -1564,7 +1582,7 @@ public class HFileBlock implements Cacheable { * Reads a version 2 block. * * @param offset the offset in the stream to read at - * @param onDiskSizeWithHeaderL the on-disk size of the block, including + * @param onDiskSizeWithHeader the on-disk size of the block, including * the header, or -1 if unknown * @param uncompressedSize the uncompressed size of the the block. Always * expected to be -1. This parameter is only used in version 1. @@ -1573,13 +1591,13 @@ public class HFileBlock implements Cacheable { * If HBase checksum is switched off, then use HDFS checksum. * @return the HFileBlock or null if there is a HBase checksum mismatch */ - private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, - long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, + protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, + int onDiskSizeWithHeader, int uncompressedSize, boolean pread, boolean verifyChecksum) throws IOException { if (offset < 0) { throw new IOException("Invalid offset=" + offset + " trying to read " - + "block (onDiskSize=" + onDiskSizeWithHeaderL + + "block (onDiskSize=" + onDiskSizeWithHeader + ", uncompressedSize=" + uncompressedSize + ")"); } @@ -1588,15 +1606,14 @@ public class HFileBlock implements Cacheable { "the uncompressed size parameter"); } - if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1) - || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) { - throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL + if ((onDiskSizeWithHeader < hdrSize && onDiskSizeWithHeader != -1) + || onDiskSizeWithHeader >= Integer.MAX_VALUE) { + throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeader + ": expected to be at least " + hdrSize + " and at most " + Integer.MAX_VALUE + ", or -1 (offset=" + offset + ", uncompressedSize=" + uncompressedSize + ")"); } - int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL; // See if we can avoid reading the header. This is desirable, because // we will not incur a backward seek operation if we have already // read this block's header as part of the previous read's look-ahead. @@ -1611,7 +1628,6 @@ public class HFileBlock implements Cacheable { int nextBlockOnDiskSize = 0; byte[] onDiskBlock = null; - HFileBlock b = null; if (onDiskSizeWithHeader > 0) { // We know the total on-disk size. Read the entire block into memory, // then parse the header. This code path is used when @@ -1637,28 +1653,12 @@ public class HFileBlock implements Cacheable { } else { headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize); } - // We know the total on-disk size but not the uncompressed size. Parse the header. - try { - // TODO: FIX!!! Expensive parse just to get a length - b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum()); - } catch (IOException ex) { - // Seen in load testing. Provide comprehensive debug info. - throw new IOException("Failed to read compressed block at " - + offset - + ", onDiskSizeWithoutHeader=" - + onDiskSizeWithHeader - + ", preReadHeaderSize=" - + hdrSize - + ", header.length=" - + prefetchedHeader.header.length - + ", header bytes: " - + Bytes.toStringBinary(prefetchedHeader.header, 0, - hdrSize), ex); - } // if the caller specifies a onDiskSizeWithHeader, validate it. - int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize; - assert onDiskSizeWithoutHeader >= 0; - b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader); + int expectedOnDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize; + int actualOnDiskSizeWithoutHeader = + headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + validateOnDiskSizeWithoutHeader(expectedOnDiskSizeWithoutHeader, + actualOnDiskSizeWithoutHeader, headerBuf, offset); } else { // Check headerBuf to see if we have read this block's header as part of // reading the previous block. This is an optimization of peeking into @@ -1679,22 +1679,21 @@ public class HFileBlock implements Cacheable { readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(), hdrSize, false, offset, pread); } - // TODO: FIX!!! Expensive parse just to get a length - b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum()); - onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize]; - // headerBuf is HBB + int onDiskSizeWithoutHeader = headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + onDiskSizeWithHeader = onDiskSizeWithoutHeader + hdrSize; + onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize); nextBlockOnDiskSize = - readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader() - - hdrSize, true, offset + hdrSize, pread); - onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize; + readAtOffset(is, onDiskBlock, hdrSize, onDiskSizeWithHeader - hdrSize, true, + offset + hdrSize, pread); } + ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader); if (!fileContext.isCompressedOrEncrypted()) { - b.assumeUncompressed(); + verifyUncompressed(headerBuf, fileContext); } - if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) { + if (verifyChecksum && !validateChecksum(offset, onDiskBlockByteBuffer, hdrSize)) { return null; // checksum mismatch } @@ -1702,8 +1701,7 @@ public class HFileBlock implements Cacheable { // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already // contains the header of next block, so no need to set next // block's header in it. - b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader), - this.fileContext.isUseHBaseChecksum()); + HFileBlock b = new HFileBlock(onDiskBlockByteBuffer, this.fileContext.isUseHBaseChecksum()); b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize; @@ -1738,15 +1736,21 @@ public class HFileBlock implements Cacheable { } /** - * Generates the checksum for the header as well as the data and - * then validates that it matches the value stored in the header. - * If there is a checksum mismatch, then return false. Otherwise - * return true. + * Generates the checksum for the header as well as the data and then validates it. + * If the block doesn't uses checksum, returns false. + * @return True if checksum matches, else false. */ - protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data, - int hdrSize) - throws IOException { - return ChecksumUtil.validateBlockChecksum(path, offset, block, data, hdrSize); + protected boolean validateChecksum(long offset, ByteBuffer data, int hdrSize) + throws IOException { + // If this is an older version of the block that does not have checksums, then return false + // indicating that checksum verification did not succeed. Actually, this method should never + // be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen + // case. Since this is a cannot-happen case, it is better to return false to indicate a + // checksum validation failure. + if (!fileContext.isUseHBaseChecksum()) { + return false; + } + return ChecksumUtil.validateChecksum(data, path, offset, hdrSize); } @Override @@ -1849,19 +1853,23 @@ public class HFileBlock implements Cacheable { } /** - * Calcuate the number of bytes required to store all the checksums - * for this block. Each checksum value is a 4 byte integer. + * Calculate the number of bytes required to store all the checksums for this block. Each + * checksum value is a 4 byte integer ({@link HFileBlock#CHECKSUM_SIZE}). */ int totalChecksumBytes() { + return HFileBlock.totalChecksumBytes(this.fileContext, onDiskDataSizeWithHeader); + } + + private static int totalChecksumBytes(HFileContext fileContext, int onDiskDataSizeWithHeader) { // If the hfile block has minorVersion 0, then there are no checksum // data to validate. Similarly, a zero value in this.bytesPerChecksum // indicates that cached blocks do not have checksum data because // checksums were already validated when the block was read from disk. - if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) { + if (!fileContext.isUseHBaseChecksum() || fileContext.getBytesPerChecksum() == 0) { return 0; } return (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader, - this.fileContext.getBytesPerChecksum()); + fileContext.getBytesPerChecksum()); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index 4f29ff5..93477df 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -186,7 +186,7 @@ public class TestChecksum { .withIncludesTags(useTags) .withHBaseCheckSum(true) .build(); - HFileBlock.FSReader hbr = new FSReaderImplTest(is, totalSize, fs, path, meta); + HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta); HFileBlock b = hbr.readBlockData(0, -1, -1, pread); b.sanityCheck(); assertEquals(4936, b.getUncompressedSizeWithoutHeader()); @@ -228,7 +228,7 @@ public class TestChecksum { HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false); assertEquals(false, newfs.useHBaseChecksum()); is = new FSDataInputStreamWrapper(newfs, path); - hbr = new FSReaderImplTest(is, totalSize, newfs, path, meta); + hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta); b = hbr.readBlockData(0, -1, -1, pread); is.close(); b.sanityCheck(); @@ -249,7 +249,7 @@ public class TestChecksum { } } - /** + /** * Test different values of bytesPerChecksum */ @Test @@ -337,20 +337,57 @@ public class TestChecksum { } /** - * A class that introduces hbase-checksum failures while - * reading data from hfiles. This should trigger the hdfs level - * checksum validations. + * This class is to test checksum behavior when data is corrupted. It mimics the following + * behavior: + * - When fs checksum is disabled, hbase may get corrupted data from hdfs. If verifyChecksum + * is true, it means hbase checksum is on and fs checksum is off, so we corrupt the data. + * - When fs checksum is enabled, hdfs will get a different copy from another node, and will + * always return correct data. So we don't corrupt the data when verifyChecksum for hbase is + * off. */ - static private class FSReaderImplTest extends HFileBlock.FSReaderImpl { - public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, + static private class CorruptedFSReaderImpl extends HFileBlock.FSReaderImpl { + /** + * If set to true, corrupt reads using readAtOffset(...). + */ + boolean corruptDataStream = false; + + public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, Path path, HFileContext meta) throws IOException { super(istream, fileSize, (HFileSystem) fs, path, meta); } @Override - protected boolean validateBlockChecksum(HFileBlock block, long offset, - byte[] data, int hdrSize) throws IOException { - return false; // checksum validation failure + protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, + int onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, boolean verifyChecksum) + throws IOException { + if (verifyChecksum) { + corruptDataStream = true; + } + HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, + uncompressedSize, pread, verifyChecksum); + corruptDataStream = false; + return b; + } + + @Override + protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, + boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { + int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock, + fileOffset, pread); + if (!corruptDataStream) { + return returnValue; + } + // Corrupt 3rd character of block magic of next block's header. + if (peekIntoNextBlock) { + dest[destOffset + size + 3] = 0b00000000; + } + // We might be reading this block's header too, corrupt it. + dest[destOffset + 1] = 0b00000000; + // Corrupt non header data + if (size > hdrSize) { + dest[destOffset + hdrSize + 1] = 0b00000000; + } + return returnValue; } } } -- 2.3.2 (Apple Git-55)