From ac95ffb01876f3c299f3d9fe299e571d2dec6807 Mon Sep 17 00:00:00 2001 From: Apekshit Date: Thu, 5 May 2016 17:05:17 -0700 Subject: [PATCH] HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum. Change-Id: I107e9636b28abb6b15ec329e885f1e31b1b1b988 --- .../apache/hadoop/hbase/io/hfile/ChecksumUtil.java | 53 ++++++-------- .../apache/hadoop/hbase/io/hfile/HFileBlock.java | 80 ++++++++++++++-------- .../apache/hadoop/hbase/io/hfile/TestChecksum.java | 64 +++++++++++++---- 3 files changed, 125 insertions(+), 72 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index b0b1714..a47cc12 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -78,46 +78,35 @@ public class ChecksumUtil { } /** - * Validates that the data in the specified HFileBlock matches the - * checksum. Generates the checksum for the data and - * then validate that it matches the value stored in the header. - * If there is a checksum mismatch, then return false. Otherwise - * return true. - * The header is extracted from the specified HFileBlock while the - * data-to-be-verified is extracted from 'data'. + * Validates that the data in the specified HFileBlock matches the checksum. Generates the + * checksums for the data and then validate that it matches those stored in the end of the data. + * @param buffer Contains the data in following order: HFileBlock header, data, checksums. + * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging. + * @param offset offset of the data being validated. Only used for logging. + * @param hdrSize Size of the block header in {@code data}. Only used for logging. + * @return True if checksum matches, else false. */ - static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block, - byte[] data, int hdrSize) throws IOException { - - // If this is an older version of the block that does not have - // checksums, then return false indicating that checksum verification - // did not succeed. Actually, this method should never be called - // when the minorVersion is 0, thus this is a defensive check for a - // cannot-happen case. Since this is a cannot-happen case, it is - // better to return false to indicate a checksum validation failure. - if (!block.getHFileContext().isUseHBaseChecksum()) { - return false; - } - - // Get a checksum object based on the type of checksum that is - // set in the HFileBlock header. A ChecksumType.NULL indicates that - // the caller is not interested in validating checksums, so we - // always return true. - ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); + static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize) + throws IOException { + // A ChecksumType.NULL indicates that the caller is not interested in validating checksums, + // so we always return true. + ChecksumType cktype = + ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX)); if (cktype == ChecksumType.NULL) { return true; // No checksum validations needed for this block. } // read in the stored value of the checksum size from the header. - int bytesPerChecksum = block.getBytesPerChecksum(); + int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX); DataChecksum dataChecksum = DataChecksum.newDataChecksum( cktype.getDataChecksumType(), bytesPerChecksum); assert dataChecksum != null; - int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); + int onDiskDataSizeWithHeader = + buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); if (LOG.isTraceEnabled()) { - LOG.info("dataLength=" + data.length - + ", sizeWithHeader=" + sizeWithHeader + LOG.info("dataLength=" + buffer.capacity() + + ", sizeWithHeader=" + onDiskDataSizeWithHeader + ", checksumType=" + cktype.getName() + ", file=" + pathName + ", offset=" + offset @@ -125,8 +114,10 @@ public class ChecksumUtil { + ", bytesPerChecksum=" + bytesPerChecksum); } try { - dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), - ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), pathName, 0); + ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader); + ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader) + .limit(buffer.capacity()); + dataChecksum.verifyChunkedSums(data, checksums, pathName, 0); } catch (ChecksumException e) { return false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index f3402da..f465be0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -276,6 +276,26 @@ public class HFileBlock implements Cacheable { CacheableDeserializerIdManager.registerDeserializer(BLOCK_DESERIALIZER); } + // Todo: encapsulate Header related logic in this inner class. + static class Header { + // Format of header is: + // 8 bytes - block magic + // 4 bytes int - onDiskSizeWithoutHeader + // 4 bytes int - uncompressedSizeWithoutHeader + // 8 bytes long - prevBlockOffset + // The following 3 are only present if header contains checksum information + // 1 byte - checksum type + // 4 byte int - bytes per checksum + // 4 byte int - onDiskDataSizeWithHeader + static int BLOCK_MAGIC_INDEX = 0; + static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8; + static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12; + static int PREV_BLOCK_OFFSET_INDEX = 16; + static int CHECKSUM_TYPE_INDEX = 24; + static int BYTES_PER_CHECKSUM_INDEX = 25; + static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29; + } + /** * Copy constructor. Creates a shallow copy of {@code that}'s buffer. */ @@ -305,7 +325,7 @@ public class HFileBlock implements Cacheable { * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader} * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader} * @param prevBlockOffset see {@link #prevBlockOffset} - * @param buf block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by + * @param b block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes) followed by * uncompressed data. * @param fillHeader when true, write the first 4 header fields into passed buffer. * @param offset the file offset the block was read from @@ -335,12 +355,13 @@ public class HFileBlock implements Cacheable { final int nextBlockOnDiskSize, HFileContext fileContext) throws IOException { buf.rewind(); final BlockType blockType = BlockType.read(buf); - final int onDiskSizeWithoutHeader = buf.getInt(); - final int uncompressedSizeWithoutHeader = buf.getInt(); - final long prevBlockOffset = buf.getLong(); - byte checksumType = buf.get(); - int bytesPerChecksum = buf.getInt(); - int onDiskDataSizeWithHeader = buf.getInt(); + final int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + final int uncompressedSizeWithoutHeader = + buf.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX); + final long prevBlockOffset = buf.getLong(Header.PREV_BLOCK_OFFSET_INDEX); + byte checksumType = buf.get(Header.CHECKSUM_TYPE_INDEX); + int bytesPerChecksum = buf.getInt(Header.BYTES_PER_CHECKSUM_INDEX); + int onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); // This constructor is called when we deserialize a block from cache and when we read a block in // from the fs. fileCache is null when deserialized from cache so need to make up one. HFileContextBuilder fileContextBuilder = fileContext != null? @@ -384,14 +405,13 @@ public class HFileBlock implements Cacheable { } /** - * Parse total ondisk size including header and checksum. Its second field in header after - * the magic bytes. + * Parse total ondisk size including header and checksum. * @param headerBuf Header ByteBuffer. Presumed exact size of header. * @return Size of the block with header included. */ private static int getOnDiskSizeWithHeader(final ByteBuffer headerBuf) { // Set hbase checksum to true always calling headerSize. - return headerBuf.getInt(BlockType.MAGIC_LENGTH) + headerSize(true); + return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + headerSize(true); } /** @@ -1459,8 +1479,7 @@ public class HFileBlock implements Cacheable { * @throws IOException */ protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, - boolean peekIntoNextBlock, long fileOffset, boolean pread) - throws IOException { + boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) { // We are asked to read the next block's header as well, but there is // not enough room in the array. @@ -1647,9 +1666,8 @@ public class HFileBlock implements Cacheable { * If HBase checksum is switched off, then use HDFS checksum. * @return the HFileBlock or null if there is a HBase checksum mismatch */ - private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, - long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) - throws IOException { + protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, + long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) throws IOException { if (offset < 0) { throw new IOException("Invalid offset=" + offset + " trying to read " + "block (onDiskSize=" + onDiskSizeWithHeaderL + ")"); @@ -1691,20 +1709,23 @@ public class HFileBlock implements Cacheable { // Do a few checks before we go instantiate HFileBlock. assert onDiskSizeWithHeader > this.hdrSize; verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset); + ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader); + // Verify checksum of the data before using it for building HFileBlock. + if (verifyChecksum && + !validateChecksum(offset, onDiskBlockByteBuffer.asReadOnlyBuffer(), hdrSize)) { + return null; + } // The onDiskBlock will become the headerAndDataBuffer for this block. // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already // contains the header of next block, so no need to set next block's header in it. HFileBlock hFileBlock = - new HFileBlock(new SingleByteBuff(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader)), + new HFileBlock(new SingleByteBuff(onDiskBlockByteBuffer), this.fileContext.isUseHBaseChecksum(), MemoryType.EXCLUSIVE, offset, nextBlockOnDiskSize, fileContext); // Run check on uncompressed sizings. if (!fileContext.isCompressedOrEncrypted()) { hFileBlock.sanityCheckUncompressed(); } - if (verifyChecksum && !validateBlockChecksum(hFileBlock, offset, onDiskBlock, hdrSize)) { - return null; - } if (LOG.isTraceEnabled()) { LOG.trace("Read " + hFileBlock); } @@ -1737,15 +1758,20 @@ public class HFileBlock implements Cacheable { } /** - * Generates the checksum for the header as well as the data and - * then validates that it matches the value stored in the header. - * If there is a checksum mismatch, then return false. Otherwise - * return true. + * Generates the checksum for the header as well as the data and then validates it. + * If the block doesn't uses checksum, returns false. + * @return True if checksum matches, else false. */ - protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data, - int hdrSize) - throws IOException { - return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize); + protected boolean validateChecksum(long offset, ByteBuffer data, int hdrSize) throws IOException { + // If this is an older version of the block that does not have checksums, then return false + // indicating that checksum verification did not succeed. Actually, this method should never + // be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen + // case. Since this is a cannot-happen case, it is better to return false to indicate a + // checksum validation failure. + if (!fileContext.isUseHBaseChecksum()) { + return false; + } + return ChecksumUtil.validateChecksum(data, pathName, offset, hdrSize); } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index d91a811..c4950c3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -108,9 +108,9 @@ public class TestChecksum { ChecksumType cktype = itr.next(); Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName()); FSDataOutputStream os = fs.create(path); - HFileContext meta = new HFileContextBuilder(). - withChecksumType(cktype). - build(); + HFileContext meta = new HFileContextBuilder() + .withChecksumType(cktype) + .build(); HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); DataOutputStream dos = hbw.startWriting(BlockType.DATA); for (int i = 0; i < 1000; ++i) { @@ -189,7 +189,7 @@ public class TestChecksum { .withIncludesTags(useTags) .withHBaseCheckSum(true) .build(); - HFileBlock.FSReader hbr = new FSReaderImplTest(is, totalSize, fs, path, meta); + HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta); HFileBlock b = hbr.readBlockData(0, -1, pread); b.sanityCheck(); assertEquals(4936, b.getUncompressedSizeWithoutHeader()); @@ -231,7 +231,7 @@ public class TestChecksum { HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false); assertEquals(false, newfs.useHBaseChecksum()); is = new FSDataInputStreamWrapper(newfs, path); - hbr = new FSReaderImplTest(is, totalSize, newfs, path, meta); + hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta); b = hbr.readBlockData(0, -1, pread); is.close(); b.sanityCheck(); @@ -252,7 +252,7 @@ public class TestChecksum { } } - /** + /** * Test different values of bytesPerChecksum */ @Test @@ -340,20 +340,56 @@ public class TestChecksum { } /** - * A class that introduces hbase-checksum failures while - * reading data from hfiles. This should trigger the hdfs level - * checksum validations. + * This class is to test checksum behavior when data is corrupted. It mimics the following + * behavior: + * - When fs checksum is disabled, hbase may get corrupted data from hdfs. If verifyChecksum + * is true, it means hbase checksum is on and fs checksum is off, so we corrupt the data. + * - When fs checksum is enabled, hdfs will get a different copy from another node, and will + * always return correct data. So we don't corrupt the data when verifyChecksum for hbase is + * off. */ - static private class FSReaderImplTest extends HFileBlock.FSReaderImpl { - public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, + static private class CorruptedFSReaderImpl extends HFileBlock.FSReaderImpl { + /** + * If set to true, corrupt reads using readAtOffset(...). + */ + boolean corruptDataStream = false; + + public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, Path path, HFileContext meta) throws IOException { super(istream, fileSize, (HFileSystem) fs, path, meta); } @Override - protected boolean validateBlockChecksum(HFileBlock block, long offset, - byte[] data, int hdrSize) throws IOException { - return false; // checksum validation failure + protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, + long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum) throws IOException { + if (verifyChecksum) { + corruptDataStream = true; + } + HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, pread, + verifyChecksum); + corruptDataStream = false; + return b; + } + + @Override + protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, + boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { + int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock, + fileOffset, pread); + if (!corruptDataStream) { + return returnValue; + } + // Corrupt 3rd character of block magic of next block's header. + if (peekIntoNextBlock) { + dest[destOffset + size + 3] = 0b00000000; + } + // We might be reading this block's header too, corrupt it. + dest[destOffset + 1] = 0b00000000; + // Corrupt non header data + if (size > hdrSize) { + dest[destOffset + hdrSize + 1] = 0b00000000; + } + return returnValue; } } } \ No newline at end of file -- 2.3.2 (Apple Git-55)