From 3bf01aa4a8ad5d6a89928fa819b695b305a13756 Mon Sep 17 00:00:00 2001 From: "Apekshit(Appy) Sharma" Date: Sun, 10 May 2015 23:01:16 -0700 Subject: [PATCH] HBASE-11927 Use Native Hadoop Library for HFile checksum. (Apekshit) --- .../org/apache/hadoop/hbase/util/ChecksumType.java | 81 ---------------- .../apache/hadoop/hbase/io/hfile/ChecksumUtil.java | 107 +++++++-------------- 2 files changed, 33 insertions(+), 155 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java index 95df769..abc5934 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java @@ -40,112 +40,31 @@ public enum ChecksumType { public String getName() { return "NULL"; } - @Override - public void initialize() { - // do nothing - } - @Override - public Checksum getChecksumObject() throws IOException { - return null; // checksums not used - } }, CRC32((byte)1) { - private transient Constructor ctor; - @Override public String getName() { return "CRC32"; } - - @Override - public void initialize() { - final String PURECRC32 = "org.apache.hadoop.util.PureJavaCrc32"; - final String JDKCRC = "java.util.zip.CRC32"; - LOG = LogFactory.getLog(ChecksumType.class); - - // check if hadoop library is available - try { - ctor = ChecksumFactory.newConstructor(PURECRC32); - LOG.debug(PURECRC32 + " available"); - } catch (Exception e) { - LOG.trace(PURECRC32 + " not available."); - } - try { - // The default checksum class name is java.util.zip.CRC32. - // This is available on all JVMs. - if (ctor == null) { - ctor = ChecksumFactory.newConstructor(JDKCRC); - LOG.debug(JDKCRC + " available"); - } - } catch (Exception e) { - LOG.trace(JDKCRC + " not available."); - } - } - - @Override - public Checksum getChecksumObject() throws IOException { - if (ctor == null) { - throw new IOException("Bad constructor for " + getName()); - } - try { - return (Checksum)ctor.newInstance(); - } catch (Exception e) { - throw new IOException(e); - } - } }, CRC32C((byte)2) { - private transient Constructor ctor; - @Override public String getName() { return "CRC32C"; } - - @Override - public void initialize() { - final String PURECRC32C = "org.apache.hadoop.util.PureJavaCrc32C"; - LOG = LogFactory.getLog(ChecksumType.class); - try { - ctor = ChecksumFactory.newConstructor(PURECRC32C); - LOG.debug(PURECRC32C + " available"); - } catch (Exception e) { - LOG.trace(PURECRC32C + " not available."); - } - } - - @Override - public Checksum getChecksumObject() throws IOException { - if (ctor == null) { - throw new IOException("Bad constructor for " + getName()); - } - try { - return (Checksum)ctor.newInstance(); - } catch (Exception e) { - throw new IOException(e); - } - } }; private final byte code; - protected Log LOG; - - /** initializes the relevant checksum class object */ - abstract void initialize(); /** returns the name of this checksum type */ public abstract String getName(); private ChecksumType(final byte c) { this.code = c; - initialize(); } - /** returns a object that can be used to generate/validate checksums */ - public abstract Checksum getChecksumObject() throws IOException; - public byte getCode() { return this.code; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index 0e03a42..9953075 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -22,17 +22,22 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.zip.Checksum; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; +import org.apache.hadoop.util.DataChecksum; /** * Utility methods to compute and validate checksums. */ @InterfaceAudience.Private public class ChecksumUtil { + public static final Log LOG = LogFactory.getLog(ChecksumUtil.class); /** This is used to reserve space in a byte buffer */ private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE]; @@ -60,33 +65,20 @@ public class ChecksumUtil { * @param checksumType type of checksum * @param bytesPerChecksum number of bytes per checksum value */ - static void generateChecksums(byte[] indata, - int startOffset, int endOffset, - byte[] outdata, int outOffset, - ChecksumType checksumType, + static void generateChecksums(byte[] indata, int startOffset, int endOffset, + byte[] outdata, int outOffset, ChecksumType checksumType, int bytesPerChecksum) throws IOException { if (checksumType == ChecksumType.NULL) { - return; // No checkums for this block. + return; // No checksum for this block. } - Checksum checksum = checksumType.getChecksumObject(); - int bytesLeft = endOffset - startOffset; - int chunkNum = 0; + DataChecksum checksum = DataChecksum.newDataChecksum( + DataChecksum.Type.valueOf(checksumType.getCode()), bytesPerChecksum); - while (bytesLeft > 0) { - // generate the checksum for one chunk - checksum.reset(); - int count = Math.min(bytesLeft, bytesPerChecksum); - checksum.update(indata, startOffset, count); - - // write the checksum value to the output buffer. - int cksumValue = (int)checksum.getValue(); - outOffset = Bytes.putInt(outdata, outOffset, cksumValue); - chunkNum++; - startOffset += count; - bytesLeft -= count; - } + checksum.calculateChunkedSums( + ByteBuffer.wrap(indata, startOffset, endOffset - startOffset), + ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset)); } /** @@ -98,7 +90,7 @@ public class ChecksumUtil { * The header is extracted from the specified HFileBlock while the * data-to-be-verified is extracted from 'data'. */ - static boolean validateBlockChecksum(Path path, HFileBlock block, + static boolean validateBlockChecksum(Path path, HFileBlock block, byte[] data, int hdrSize) throws IOException { // If this is an older version of the block that does not have @@ -117,65 +109,32 @@ public class ChecksumUtil { // always return true. ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); if (cktype == ChecksumType.NULL) { - return true; // No checkums validations needed for this block. + return true; // No checksum validations needed for this block. } - Checksum checksumObject = cktype.getChecksumObject(); - checksumObject.reset(); // read in the stored value of the checksum size from the header. int bytesPerChecksum = block.getBytesPerChecksum(); - // bytesPerChecksum is always larger than the size of the header - if (bytesPerChecksum < hdrSize) { - String msg = "Unsupported value of bytesPerChecksum. " + - " Minimum is " + hdrSize + - " but the configured value is " + bytesPerChecksum; - HFile.LOG.warn(msg); - return false; // cannot happen case, unable to verify checksum - } - // Extract the header and compute checksum for the header. - ByteBuffer hdr = block.getBufferWithHeader(); - if (hdr.hasArray()) { - checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize); - } else { - checksumObject.update(ByteBufferUtils.toBytes(hdr, 0, hdrSize), 0, hdrSize); + DataChecksum dataChecksum = DataChecksum.newDataChecksum( + DataChecksum.Type.valueOf(cktype.getCode()), bytesPerChecksum); + assert dataChecksum != null; + int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); + if (LOG.isTraceEnabled()) { + LOG.info("length of data = " + data.length + + " OnDiskDataSizeWithHeader = " + sizeWithHeader + + " checksum type = " + cktype.getName() + + " file =" + path.toString() + + " header size = " + hdrSize + + " bytesPerChecksum = " + bytesPerChecksum); } - - int off = hdrSize; - int consumed = hdrSize; - int bytesLeft = block.getOnDiskDataSizeWithHeader() - off; - int cksumOffset = block.getOnDiskDataSizeWithHeader(); - - // validate each chunk - while (bytesLeft > 0) { - int thisChunkSize = bytesPerChecksum - consumed; - int count = Math.min(bytesLeft, thisChunkSize); - checksumObject.update(data, off, count); - - int storedChecksum = Bytes.toInt(data, cksumOffset); - if (storedChecksum != (int)checksumObject.getValue()) { - String msg = "File " + path + - " Stored checksum value of " + storedChecksum + - " at offset " + cksumOffset + - " does not match computed checksum " + - checksumObject.getValue() + - ", total data size " + data.length + - " Checksum data range offset " + off + " len " + count + - HFileBlock.toStringHeader(block.getBufferReadOnly()); - HFile.LOG.warn(msg); - if (generateExceptions) { - throw new IOException(msg); // this is only for unit tests - } else { - return false; // checksum validation failure - } - } - cksumOffset += HFileBlock.CHECKSUM_SIZE; - bytesLeft -= count; - off += count; - consumed = 0; - checksumObject.reset(); + try { + dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), + ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), + path.toString(), 0); + } catch (ChecksumException e) { + return false; } - return true; // checksum is valid + return true; // checksum is valid } /** -- 2.3.2 (Apple Git-55)