From ba8077b174f58a6bb0212fafeb78e8f0c62b1f15 Mon Sep 17 00:00:00 2001 From: mbautin Date: Tue, 15 Nov 2011 13:48:24 -0800 Subject: [PATCH] Delta encoding --- .../org/apache/hadoop/hbase/HColumnDescriptor.java | 93 +++- .../java/org/apache/hadoop/hbase/HConstants.java | 4 + .../java/org/apache/hadoop/hbase/KeyValue.java | 191 +++++-- .../hadoop/hbase/io/HalfStoreFileReader.java | 7 +- .../io/encoding/BufferedDataBlockEncoder.java | 290 ++++++++++ .../hadoop/hbase/io/encoding/CompressionState.java | 112 ++++ .../hbase/io/encoding/CopyKeyDataBlockEncoder.java | 95 ++++ .../hadoop/hbase/io/encoding/DataBlockEncoder.java | 140 +++++ .../hbase/io/encoding/DataBlockEncoding.java | 180 ++++++ .../hbase/io/encoding/DiffKeyDeltaEncoder.java | 548 ++++++++++++++++++ .../hadoop/hbase/io/encoding/EncodedDataBlock.java | 221 ++++++++ .../encoding/EncoderBufferTooSmallException.java | 28 + .../hbase/io/encoding/FastDiffDeltaEncoder.java | 522 +++++++++++++++++ .../hbase/io/encoding/PrefixKeyDeltaEncoder.java | 195 +++++++ .../hadoop/hbase/io/hfile/AbstractHFileReader.java | 41 ++- .../hadoop/hbase/io/hfile/AbstractHFileWriter.java | 12 +- .../hadoop/hbase/io/hfile/BlockCacheKey.java | 30 +- .../apache/hadoop/hbase/io/hfile/BlockType.java | 19 + .../org/apache/hadoop/hbase/io/hfile/HFile.java | 34 +- .../apache/hadoop/hbase/io/hfile/HFileBlock.java | 318 ++++++++---- .../hadoop/hbase/io/hfile/HFileBlockIndex.java | 9 +- .../hbase/io/hfile/HFileDataBlockEncoder.java | 76 +++ .../hbase/io/hfile/HFileDataBlockEncoderImpl.java | 202 +++++++ .../hadoop/hbase/io/hfile/HFilePrettyPrinter.java | 2 + .../hadoop/hbase/io/hfile/HFileReaderV1.java | 199 ++++---- .../hadoop/hbase/io/hfile/HFileReaderV2.java | 514 ++++++++++++------ .../hadoop/hbase/io/hfile/HFileWriterV1.java | 48 +- .../hadoop/hbase/io/hfile/HFileWriterV2.java | 66 ++- .../hbase/io/hfile/NoOpDataBlockEncoder.java | 75 +++ .../hbase/mapreduce/LoadIncrementalHFiles.java | 12 +- .../hbase/regionserver/CompactSplitThread.java | 2 +- .../apache/hadoop/hbase/regionserver/HRegion.java | 2 +- .../apache/hadoop/hbase/regionserver/MemStore.java | 3 + .../apache/hadoop/hbase/regionserver/Store.java | 50 ++- .../hadoop/hbase/regionserver/StoreFile.java | 89 ++- .../hbase/regionserver/StoreFileScanner.java | 7 +- .../regionserver/metrics/SchemaConfigured.java | 9 + .../apache/hadoop/hbase/util/ByteBufferUtils.java | 494 +++++++++++++++++ src/main/ruby/hbase/admin.rb | 2 + .../hbase/BROKE_TODO_FIX_TestAcidGuarantees.java | 2 +- .../org/apache/hadoop/hbase/HBaseTestCase.java | 26 +- .../apache/hadoop/hbase/HBaseTestingUtility.java | 12 +- .../hadoop/hbase/HFilePerformanceEvaluation.java | 5 +- .../apache/hadoop/hbase/TestAcidGuarantees.java | 2 +- .../org/apache/hadoop/hbase/client/TestAdmin.java | 2 +- .../hadoop/hbase/client/TestFromClientSide.java | 2 + .../hadoop/hbase/io/TestHalfStoreFileReader.java | 5 +- .../org/apache/hadoop/hbase/io/TestHeapSize.java | 22 +- .../hbase/io/encoding/RedundantKVGenerator.java | 290 ++++++++++ .../io/encoding/TestBufferedDataBlockEncoder.java | 43 ++ .../hbase/io/encoding/TestChangingEncoding.java | 215 +++++++ .../hbase/io/encoding/TestDataBlockEncoders.java | 346 ++++++++++++ .../hbase/io/encoding/TestEncodedSeekers.java | 123 ++++ .../hadoop/hbase/io/hfile/CacheTestUtils.java | 13 +- .../hadoop/hbase/io/hfile/TestCacheOnWrite.java | 113 +++-- .../apache/hadoop/hbase/io/hfile/TestHFile.java | 4 +- .../hadoop/hbase/io/hfile/TestHFileBlock.java | 243 ++++++++- .../hadoop/hbase/io/hfile/TestHFileBlockIndex.java | 18 +- .../hbase/io/hfile/TestHFileDataBlockEncoder.java | 188 +++++++ .../hbase/io/hfile/TestHFilePerformance.java | 2 +- .../hadoop/hbase/io/hfile/TestHFileSeek.java | 2 +- .../hadoop/hbase/io/hfile/TestHFileWriterV2.java | 18 +- .../hadoop/hbase/mapreduce/TestImportExport.java | 4 + .../hbase/regionserver/CreateRandomStoreFile.java | 4 +- .../hbase/regionserver/DataBlockEncodingTool.java | 585 ++++++++++++++++++++ .../regionserver/EncodedSeekPerformanceTest.java | 171 ++++++ .../hbase/regionserver/TestCompactSelection.java | 6 +- .../hadoop/hbase/regionserver/TestCompaction.java | 76 ++- .../regionserver/TestCompoundBloomFilter.java | 6 +- .../hbase/regionserver/TestFSErrorsExposed.java | 8 +- .../hbase/regionserver/TestMultiColumnScanner.java | 23 +- .../hbase/regionserver/TestScanWithBloomError.java | 4 +- .../hbase/regionserver/TestSeekOptimizations.java | 3 +- .../hadoop/hbase/regionserver/TestStoreFile.java | 118 +++-- .../hbase/regionserver/wal/TestLogRolling.java | 3 +- .../hbase/regionserver/wal/TestWALReplay.java | 2 +- .../hadoop/hbase/util/LoadTestKVGenerator.java | 2 +- .../org/apache/hadoop/hbase/util/LoadTestTool.java | 70 ++- .../hadoop/hbase/util/MultiThreadedReader.java | 2 +- .../hadoop/hbase/util/MultiThreadedWriter.java | 4 +- .../apache/hadoop/hbase/util/RestartMetaTest.java | 5 +- .../hadoop/hbase/util/TestByteBufferUtils.java | 356 ++++++++++++ .../hadoop/hbase/util/TestLoadTestKVGenerator.java | 2 +- .../hbase/util/TestMiniClusterLoadEncoded.java | 61 ++ .../hbase/util/TestMiniClusterLoadParallel.java | 14 +- .../hbase/util/TestMiniClusterLoadSequential.java | 41 ++- 86 files changed, 7488 insertions(+), 719 deletions(-) create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/EncoderBufferTooSmallException.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java create mode 100644 src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java create mode 100644 src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java create mode 100644 src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java create mode 100644 src/test/java/org/apache/hadoop/hbase/io/encoding/TestBufferedDataBlockEncoder.java create mode 100644 src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java create mode 100644 src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java create mode 100644 src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java create mode 100644 src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java create mode 100644 src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java create mode 100644 src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java create mode 100644 src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java create mode 100644 src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadEncoded.java diff --git src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java index 7910c4a..aa9f55c 100644 --- src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java +++ src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java @@ -27,6 +27,7 @@ import java.util.HashMap; import java.util.Map; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -53,10 +54,16 @@ public class HColumnDescriptor implements WritableComparable // Version 6 adds metadata as a map where keys and values are byte[]. // Version 7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217) // Version 8 -- reintroduction of bloom filters, changed from boolean to enum - private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)8; + // Version 9 -- add data block encoding + private static final byte COLUMN_DESCRIPTOR_VERSION = (byte) 9; + // These constants are used as FileInfo keys public static final String COMPRESSION = "COMPRESSION"; public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT"; + public static final String ENCODE_ON_DISK = + "ENCODE_ON_DISK"; + public static final String DATA_BLOCK_ENCODING = + "DATA_BLOCK_ENCODING"; public static final String BLOCKCACHE = "BLOCKCACHE"; /** @@ -79,6 +86,19 @@ public class HColumnDescriptor implements WritableComparable */ public static final String DEFAULT_COMPRESSION = Compression.Algorithm.NONE.getName(); + + /** + * Default value of the flag that enables data block encoding on disk, as + * opposed to encoding in cache only. We encode blocks everywhere by default, + * as long as {@link #DATA_BLOCK_ENCODING} is not NONE. + */ + public static final boolean DEFAULT_ENCODE_ON_DISK = true; + + /** + * Default data block encoding algorithm in cache. + */ + public static final String DEFAULT_DATA_BLOCK_ENCODING = + DataBlockEncoding.NONE.toString(); /** * Default number of versions of a record to keep. @@ -143,6 +163,10 @@ public class HColumnDescriptor implements WritableComparable DEFAULT_VALUES.put(HConstants.IN_MEMORY, String.valueOf(DEFAULT_IN_MEMORY)); DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE)); DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED)); + DEFAULT_VALUES.put(ENCODE_ON_DISK, + String.valueOf(DEFAULT_ENCODE_ON_DISK)); + DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, + String.valueOf(DEFAULT_DATA_BLOCK_ENCODING)); } // Column family name @@ -241,7 +265,7 @@ public class HColumnDescriptor implements WritableComparable * cache * @param blockCacheEnabled If true, MapFile blocks should be cached * @param blocksize Block size to use when writing out storefiles. Use - * smaller blocksizes for faster random-access at expense of larger indices + * smaller block sizes for faster random-access at expense of larger indices * (more memory consumption). Default is usually 64k. * @param timeToLive Time-to-live of cell contents, in seconds * (use HConstants.FOREVER for unlimited TTL) @@ -258,8 +282,9 @@ public class HColumnDescriptor implements WritableComparable final boolean blockCacheEnabled, final int blocksize, final int timeToLive, final String bloomFilter, final int scope) { this(familyName, DEFAULT_MIN_VERSIONS, maxVersions, DEFAULT_KEEP_DELETED, - compression, inMemory, blockCacheEnabled, blocksize, timeToLive, - bloomFilter, scope); + compression, DEFAULT_ENCODE_ON_DISK, DEFAULT_DATA_BLOCK_ENCODING, + inMemory, blockCacheEnabled, blocksize, timeToLive, bloomFilter, + scope); } /** @@ -271,6 +296,9 @@ public class HColumnDescriptor implements WritableComparable * @param keepDeletedCells Whether to retain deleted cells until they expire * up to maxVersions versions. * @param compression Compression type + * @param encodeOnDisk whether to use the specified data block encoding + * on disk. If false, the encoding will be used in cache only. + * @param dataBlockEncoding data block encoding * @param inMemory If true, column data should be kept in an HRegionServer's * cache * @param blockCacheEnabled If true, MapFile blocks should be cached @@ -289,7 +317,8 @@ public class HColumnDescriptor implements WritableComparable */ public HColumnDescriptor(final byte[] familyName, final int minVersions, final int maxVersions, final boolean keepDeletedCells, - final String compression, final boolean inMemory, + final String compression, final boolean encodeOnDisk, + final String dataBlockEncoding, final boolean inMemory, final boolean blockCacheEnabled, final int blocksize, final int timeToLive, final String bloomFilter, final int scope) { isLegalFamilyName(familyName); @@ -319,6 +348,9 @@ public class HColumnDescriptor implements WritableComparable setTimeToLive(timeToLive); setCompressionType(Compression.Algorithm. valueOf(compression.toUpperCase())); + setEncodeOnDisk(encodeOnDisk); + setDataBlockEncoding(DataBlockEncoding. + valueOf(dataBlockEncoding.toUpperCase())); setBloomFilterType(StoreFile.BloomType. valueOf(bloomFilter.toUpperCase())); setBlocksize(blocksize); @@ -496,6 +528,57 @@ public class HColumnDescriptor implements WritableComparable setValue(COMPRESSION, compressionType); } + /** @return data block encoding algorithm used on disk */ + public DataBlockEncoding getDataBlockEncodingOnDisk() { + String encodeOnDiskStr = getValue(ENCODE_ON_DISK); + boolean encodeOnDisk; + if (encodeOnDiskStr == null) { + encodeOnDisk = DEFAULT_ENCODE_ON_DISK; + } else { + encodeOnDisk = Boolean.valueOf(encodeOnDiskStr); + } + + if (!encodeOnDisk) { + // No encoding on disk. + return DataBlockEncoding.NONE; + } + return getDataBlockEncoding(); + } + + /** + * Set the flag indicating that we only want to encode data block in cache + * but not on disk. + */ + public void setEncodeOnDisk(boolean encodeOnDisk) { + setValue(ENCODE_ON_DISK, String.valueOf(encodeOnDisk)); + } + + /** + * @return the data block encoding algorithm used in block cache and + * optionally on disk + */ + public DataBlockEncoding getDataBlockEncoding() { + String type = getValue(DATA_BLOCK_ENCODING); + if (type == null) { + type = DEFAULT_DATA_BLOCK_ENCODING; + } + return DataBlockEncoding.valueOf(type); + } + + /** + * Set data block encoding algorithm used in block cache. + * @param type What kind of data block encoding will be used. + */ + public void setDataBlockEncoding(DataBlockEncoding type) { + String name; + if (type != null) { + name = type.toString(); + } else { + name = DataBlockEncoding.NONE.toString(); + } + setValue(DATA_BLOCK_ENCODING, name); + } + /** * @return Compression type setting. */ diff --git src/main/java/org/apache/hadoop/hbase/HConstants.java src/main/java/org/apache/hadoop/hbase/HConstants.java index 5120a3c..98f2454 100644 --- src/main/java/org/apache/hadoop/hbase/HConstants.java +++ src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -221,6 +221,10 @@ public final class HConstants { /** Default maximum file size */ public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024; + /** Conf key for the memstore size at which we flush the memstore */ + public static final String HREGION_MEMSTORE_FLUSH_SIZE = + "hbase.hregion.memstore.flush.size"; + /** Default size of a reservation block */ public static final int DEFAULT_SIZE_RESERVATION_BLOCK = 1024 * 1024 * 5; diff --git src/main/java/org/apache/hadoop/hbase/KeyValue.java src/main/java/org/apache/hadoop/hbase/KeyValue.java index be7e2d8..98f9345 100644 --- src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -130,16 +130,27 @@ public class KeyValue implements Writable, HeapSize { return COMPARATOR.getRawComparator(); } + /** Size of the key length field in bytes*/ + public static final int KEY_LENGTH_SIZE = Bytes.SIZEOF_INT; + + /** Size of the key type field in bytes */ + public static final int TYPE_SIZE = Bytes.SIZEOF_BYTE; + + /** Size of the row length field in bytes */ + public static final int ROW_LENGTH_SIZE = Bytes.SIZEOF_SHORT; + + /** Size of the family length field in bytes */ + public static final int FAMILY_LENGTH_SIZE = Bytes.SIZEOF_BYTE; + + /** Size of the timestamp field in bytes */ + public static final int TIMESTAMP_SIZE = Bytes.SIZEOF_LONG; + // Size of the timestamp and type byte on end of a key -- a long + a byte. - public static final int TIMESTAMP_TYPE_SIZE = - Bytes.SIZEOF_LONG /* timestamp */ + - Bytes.SIZEOF_BYTE /*keytype*/; + public static final int TIMESTAMP_TYPE_SIZE = TIMESTAMP_SIZE + TYPE_SIZE; // Size of the length shorts and bytes in key. - public static final int KEY_INFRASTRUCTURE_SIZE = - Bytes.SIZEOF_SHORT /*rowlength*/ + - Bytes.SIZEOF_BYTE /*columnfamilylength*/ + - TIMESTAMP_TYPE_SIZE; + public static final int KEY_INFRASTRUCTURE_SIZE = ROW_LENGTH_SIZE + + FAMILY_LENGTH_SIZE + TIMESTAMP_TYPE_SIZE; // How far into the key the row starts at. First thing to read is the short // that says how long the row is. @@ -701,10 +712,10 @@ public class KeyValue implements Writable, HeapSize { */ /** * Produces a string map for this key/value pair. Useful for programmatic use - * and manipulation of the data stored in an HLogKey, for example, printing - * as JSON. Values are left out due to their tendency to be large. If needed, + * and manipulation of the data stored in an HLogKey, for example, printing + * as JSON. Values are left out due to their tendency to be large. If needed, * they can be added manually. - * + * * @return the Map containing data from this key */ public Map toStringMap() { @@ -730,13 +741,21 @@ public class KeyValue implements Writable, HeapSize { Bytes.toStringBinary(b, columnoffset + familylength, columnlength - familylength); long timestamp = Bytes.toLong(b, o + (l - TIMESTAMP_TYPE_SIZE)); + String timestampStr = humanReadableTimestamp(timestamp); byte type = b[o + l - 1]; -// return row + "/" + family + -// (family != null && family.length() > 0? COLUMN_FAMILY_DELIMITER: "") + -// qualifier + "/" + timestamp + "/" + Type.codeToType(type); return row + "/" + family + (family != null && family.length() > 0? ":" :"") + - qualifier + "/" + timestamp + "/" + Type.codeToType(type); + qualifier + "/" + timestampStr + "/" + Type.codeToType(type); + } + + public static String humanReadableTimestamp(final long timestamp) { + if (timestamp == HConstants.LATEST_TIMESTAMP) { + return "LATEST_TIMESTAMP"; + } + if (timestamp == HConstants.OLDEST_TIMESTAMP) { + return "OLDEST_TIMESTAMP"; + } + return String.valueOf(timestamp); } //--------------------------------------------------------------------------- @@ -1322,7 +1341,7 @@ public class KeyValue implements Writable, HeapSize { // Rebuild as: <0:4> int dataLen = lenAsVal? Bytes.SIZEOF_INT : 0; byte [] newBuffer = new byte[getKeyLength() + (2 * Bytes.SIZEOF_INT) + dataLen]; - System.arraycopy(this.bytes, this.offset, newBuffer, 0, + System.arraycopy(this.bytes, this.offset, newBuffer, 0, Math.min(newBuffer.length,this.length)); Bytes.putInt(newBuffer, Bytes.SIZEOF_INT, dataLen); if (lenAsVal) { @@ -1393,7 +1412,7 @@ public class KeyValue implements Writable, HeapSize { } /** - * This function is only used in Meta key comparisons so its error message + * This function is only used in Meta key comparisons so its error message * is specific for meta key errors. */ static int getRequiredDelimiterInReverse(final byte [] b, @@ -1561,7 +1580,7 @@ public class KeyValue implements Writable, HeapSize { return getRawComparator().compareRows(left, loffset, llength, right, roffset, rlength); } - + public int compareColumns(final KeyValue left, final byte [] right, final int roffset, final int rlength, final int rfamilyoffset) { int offset = left.getFamilyOffset(); @@ -1595,7 +1614,7 @@ public class KeyValue implements Writable, HeapSize { short lrowlength = left.getRowLength(); short rrowlength = right.getRowLength(); // TsOffset = end of column data. just comparing Row+CF length of each - return ((left.getTimestampOffset() - left.getOffset()) == + return ((left.getTimestampOffset() - left.getOffset()) == (right.getTimestampOffset() - right.getOffset())) && matchingRows(left, lrowlength, right, rrowlength) && compareColumns(left, lrowlength, right, rrowlength) == 0; @@ -1881,7 +1900,7 @@ public class KeyValue implements Writable, HeapSize { final int l) { byte [] newb = new byte[b.length + ROW_OFFSET]; System.arraycopy(b, o, newb, ROW_OFFSET, l); - Bytes.putInt(newb, 0, b.length); + Bytes.putInt(newb, 0, l); Bytes.putInt(newb, Bytes.SIZEOF_INT, 0); return new KeyValue(newb); } @@ -2004,9 +2023,23 @@ public class KeyValue implements Writable, HeapSize { } /** + * Avoids redundant comparisons for better performance. + */ + public static interface SamePrefixComparator { + /** + * Compare two keys assuming that the first n bytes are the same. + * @param commonPrefix How many bytes are the same. + */ + public int compareIgnoringPrefix(int commonPrefix, + T left, int loffset, int llength, + T right, int roffset, int rlength); + } + + /** * Compare key portion of a {@link KeyValue}. */ - public static class KeyComparator implements RawComparator { + public static class KeyComparator + implements RawComparator, SamePrefixComparator { volatile boolean ignoreTimestamp = false; volatile boolean ignoreType = false; @@ -2016,45 +2049,123 @@ public class KeyValue implements Writable, HeapSize { short lrowlength = Bytes.toShort(left, loffset); short rrowlength = Bytes.toShort(right, roffset); int compare = compareRows(left, loffset + Bytes.SIZEOF_SHORT, - lrowlength, - right, roffset + Bytes.SIZEOF_SHORT, rrowlength); + lrowlength, right, roffset + Bytes.SIZEOF_SHORT, rrowlength); if (compare != 0) { return compare; } - // Compare column family. Start compare past row and family length. - int lcolumnoffset = Bytes.SIZEOF_SHORT + lrowlength + 1 + loffset; - int rcolumnoffset = Bytes.SIZEOF_SHORT + rrowlength + 1 + roffset; + // Compare the rest of the two KVs without making any assumptions about + // the common prefix. This function will not compare rows anyway, so we + // don't need to tell it that the common prefix includes the row. + return compareWithoutRow(0, left, loffset, llength, right, roffset, + rlength, rrowlength); + } + + /** + * Compare the two key-values, ignoring the prefix of the given length + * that is known to be the same between the two. + * @param commonPrefix the prefix length to ignore + */ + @Override + public int compareIgnoringPrefix(int commonPrefix, byte[] left, + int loffset, int llength, byte[] right, int roffset, int rlength) { + // Compare row + short lrowlength = Bytes.toShort(left, loffset); + short rrowlength; + + int comparisonResult = 0; + if (commonPrefix < ROW_LENGTH_SIZE) { + // almost nothing in common + rrowlength = Bytes.toShort(right, roffset); + comparisonResult = compareRows(left, loffset + ROW_LENGTH_SIZE, + lrowlength, right, roffset + ROW_LENGTH_SIZE, rrowlength); + } else { // the row length is the same + rrowlength = lrowlength; + if (commonPrefix < ROW_LENGTH_SIZE + rrowlength) { + // The rows are not the same. Exclude the common prefix and compare + // the rest of the two rows. + int common = commonPrefix - ROW_LENGTH_SIZE; + comparisonResult = compareRows( + left, loffset + common + ROW_LENGTH_SIZE, lrowlength - common, + right, roffset + common + ROW_LENGTH_SIZE, rrowlength - common); + } + } + if (comparisonResult != 0) { + return comparisonResult; + } + + assert lrowlength == rrowlength; + + return compareWithoutRow(commonPrefix, left, loffset, llength, right, + roffset, rlength, lrowlength); + } + + /** + * Compare column, timestamp, and key type (everything except the row). + * This method is used both in the normal comparator and the "same-prefix" + * comparator. Note that we are assuming that row portions of both KVs have + * already been parsed and found identical, and we don't validate that + * assumption here. + * @param commonPrefix the length of the common prefix of the two + * key-values being compared, including row length and row + */ + private int compareWithoutRow(int commonPrefix, byte[] left, int loffset, + int llength, byte[] right, int roffset, int rlength, short rowlength) { + // Compare column family. Start comparing past row and family length. + int lcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + + rowlength + loffset; + int rcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + + rowlength + roffset; int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE - - (lcolumnoffset - loffset); + (lcolumnoffset - loffset); int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE - - (rcolumnoffset - roffset); + (rcolumnoffset - roffset); - // if row matches, and no column in the 'left' AND put type is 'minimum', + // If row matches, and no column in the 'left' AND put type is 'minimum', // then return that left is larger than right. - // This supports 'last key on a row' - the magic is if there is no column in the - // left operand, and the left operand has a type of '0' - magical value, - // then we say the left is bigger. This will let us seek to the last key in - // a row. + // This supports 'last key on a row' - the magic is if there is no column + // in the left operand, and the left operand has a type of '0' - magical + // value, then we say the left is bigger. This will let us seek to the + // last key in a row. byte ltype = left[loffset + (llength - 1)]; byte rtype = right[roffset + (rlength - 1)]; + // If the column is not specified, the "minimum" key type appears the + // latest in the sorted order, regardless of the timestamp. This is used + // for specifying the last key/value in a given row, because there is no + // "lexicographically last column" (it would be infinitely long). The + // "maximum" key type does not need this behavior. if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) { - return 1; // left is bigger. + // left is "bigger", i.e. it appears later in the sorted order + return 1; } if (rcolumnlength == 0 && rtype == Type.Minimum.getCode()) { return -1; } - // TODO the family and qualifier should be compared separately - compare = Bytes.compareTo(left, lcolumnoffset, lcolumnlength, right, - rcolumnoffset, rcolumnlength); - if (compare != 0) { - return compare; + int common = 0; + if (commonPrefix > 0) { + common = Math.max(0, commonPrefix - + rowlength - ROW_LENGTH_SIZE - FAMILY_LENGTH_SIZE); + common = Math.min(common, Math.min(lcolumnlength, rcolumnlength)); + } + + final int comparisonResult = Bytes.compareTo( + left, lcolumnoffset + common, lcolumnlength - common, + right, rcolumnoffset + common, rcolumnlength - common); + if (comparisonResult != 0) { + return comparisonResult; } + return compareTimestampAndType(left, loffset, llength, right, roffset, + rlength, ltype, rtype); + } + + private int compareTimestampAndType(byte[] left, int loffset, int llength, + byte[] right, int roffset, int rlength, byte ltype, byte rtype) { + int compare; if (!this.ignoreTimestamp) { // Get timestamps. long ltimestamp = Bytes.toLong(left, @@ -2069,7 +2180,9 @@ public class KeyValue implements Writable, HeapSize { if (!this.ignoreType) { // Compare types. Let the delete types sort ahead of puts; i.e. types - // of higher numbers sort before those of lesser numbers + // of higher numbers sort before those of lesser numbers. Maximum (255) + // appears ahead of everything, and minimum (0) appears after + // everything. return (0xff & rtype) - (0xff & ltype); } return 0; diff --git src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java index 23bbc2b..c2ceb1e 100644 --- src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java +++ src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java @@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -60,9 +61,9 @@ public class HalfStoreFileReader extends StoreFile.Reader { * @throws IOException */ public HalfStoreFileReader(final FileSystem fs, final Path p, - final CacheConfig cacheConf, final Reference r) - throws IOException { - super(fs, p, cacheConf); + final CacheConfig cacheConf, final Reference r, + DataBlockEncoding preferredEncodingInCache) throws IOException { + super(fs, p, cacheConf, preferredEncodingInCache); // This is not actual midkey for this half-file; its just border // around which we split top and bottom. Have to look in files to find // actual last and first keys for bottom and top halves. Half-files don't diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java new file mode 100644 index 0000000..56a6f82 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.SamePrefixComparator; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.io.WritableUtils; + +/** + * Base class for all data block encoders that use a buffer. + */ +abstract class BufferedDataBlockEncoder implements DataBlockEncoder { + + private static int INITIAL_KEY_BUFFER_SIZE = 512; + + @Override + public ByteBuffer uncompressKeyValues(DataInputStream source, + boolean includesMemstoreTS) throws IOException { + return uncompressKeyValues(source, 0, 0, includesMemstoreTS); + } + + protected static class SeekerState { + protected int valueOffset = -1; + protected int keyLength; + protected int valueLength; + protected int lastCommonPrefix; + + /** We need to store a copy of the key. */ + protected byte[] keyBuffer = new byte[INITIAL_KEY_BUFFER_SIZE]; + + protected long memstoreTS; + protected int nextKvOffset; + + protected boolean isValid() { + return valueOffset != -1; + } + + protected void invalidate() { + valueOffset = -1; + } + + protected void ensureSpaceForKey() { + if (keyLength > keyBuffer.length) { + // rare case, but we need to handle arbitrary length of key + int newKeyBufferLength = Math.max(keyBuffer.length, 1) * 2; + while (keyLength > newKeyBufferLength) { + newKeyBufferLength *= 2; + } + byte[] newKeyBuffer = new byte[newKeyBufferLength]; + System.arraycopy(keyBuffer, 0, newKeyBuffer, 0, keyBuffer.length); + keyBuffer = newKeyBuffer; + } + } + + /** Copy all fields from another instance */ + protected void copyFrom(SeekerState other) { + if (keyBuffer.length != other.keyBuffer.length) { + keyBuffer = other.keyBuffer.clone(); + } else if (!isValid()) { + // Note: we can only call isValid before we override our state. + System.arraycopy(other.keyBuffer, 0, keyBuffer, 0, + other.keyLength); + } else { + // don't copy the common prefix between this key and the previous one + System.arraycopy(other.keyBuffer, other.lastCommonPrefix, + keyBuffer, other.lastCommonPrefix, other.keyLength + - other.lastCommonPrefix); + } + + valueOffset = other.valueOffset; + keyLength = other.keyLength; + valueLength = other.valueLength; + lastCommonPrefix = other.lastCommonPrefix; + nextKvOffset = other.nextKvOffset; + memstoreTS = other.memstoreTS; + } + + } + + protected abstract static class + BufferedEncodedSeeker + implements EncodedSeeker { + + protected final RawComparator comparator; + protected final SamePrefixComparator samePrefixComparator; + protected ByteBuffer currentBuffer; + protected STATE current = createSeekerState(); // always valid + protected STATE previous = createSeekerState(); // may not be valid + + @SuppressWarnings("unchecked") + public BufferedEncodedSeeker(RawComparator comparator) { + this.comparator = comparator; + if (comparator instanceof SamePrefixComparator) { + this.samePrefixComparator = (SamePrefixComparator) comparator; + } else { + this.samePrefixComparator = null; + } + } + + @Override + public void setCurrentBuffer(ByteBuffer buffer) { + currentBuffer = buffer; + decodeFirst(); + previous.invalidate(); + } + + @Override + public ByteBuffer getKey() { + ByteBuffer keyBuffer = ByteBuffer.allocate(current.keyLength); + keyBuffer.put(current.keyBuffer, 0, current.keyLength); + return keyBuffer; + } + + @Override + public ByteBuffer getValue() { + return ByteBuffer.wrap(currentBuffer.array(), + currentBuffer.arrayOffset() + current.valueOffset, + current.valueLength); + } + + @Override + public ByteBuffer getKeyValueBuffer() { + ByteBuffer kvBuffer = ByteBuffer.allocate( + 2 * Bytes.SIZEOF_INT + current.keyLength + current.valueLength); + kvBuffer.putInt(current.keyLength); + kvBuffer.putInt(current.valueLength); + kvBuffer.put(current.keyBuffer, 0, current.keyLength); + kvBuffer.put(currentBuffer.array(), + currentBuffer.arrayOffset() + current.valueOffset, + current.valueLength); + return kvBuffer; + } + + @Override + public KeyValue getKeyValue() { + ByteBuffer kvBuf = getKeyValueBuffer(); + KeyValue kv = new KeyValue(kvBuf.array(), kvBuf.arrayOffset()); + kv.setMemstoreTS(current.memstoreTS); + return kv; + } + + @Override + public void rewind() { + currentBuffer.rewind(); + decodeFirst(); + previous.invalidate(); + } + + @Override + public boolean next() { + if (!currentBuffer.hasRemaining()) { + return false; + } + decodeNext(); + previous.invalidate(); + return true; + } + + @Override + public int blockSeekTo(byte[] key, int offset, int length, + boolean seekBefore) { + int commonPrefix = 0; + previous.invalidate(); + do { + int comp; + if (samePrefixComparator != null) { + commonPrefix = Math.min(commonPrefix, current.lastCommonPrefix); + + // extend commonPrefix + commonPrefix += ByteBufferUtils.findCommonPrefix( + key, offset + commonPrefix, length - commonPrefix, + current.keyBuffer, commonPrefix, + current.keyLength - commonPrefix); + + comp = samePrefixComparator.compareIgnoringPrefix(commonPrefix, key, + offset, length, current.keyBuffer, 0, current.keyLength); + } else { + comp = comparator.compare(key, offset, length, + current.keyBuffer, 0, current.keyLength); + } + + if (comp == 0) { // exact match + if (seekBefore) { + moveToPrevious(); + return 1; + } + return 0; + } + + if (comp < 0) { // already too large, check previous + if (previous.isValid()) { + moveToPrevious(); + } + return 1; + } + + // move to next, if more data is available + if (currentBuffer.hasRemaining()) { + previous.copyFrom(current); + decodeNext(); + } else { + break; + } + } while (true); + + // we hit end of file, not exact match + return 1; + } + + private void moveToPrevious() { + if (!previous.isValid()) { + throw new IllegalStateException( + "Can move back only once and not in first key in the block."); + } + + STATE tmp = previous; + previous = current; + current = tmp; + + // move after last key value + currentBuffer.position(current.nextKvOffset); + + previous.invalidate(); + } + + @SuppressWarnings("unchecked") + protected STATE createSeekerState() { + // This will fail for non-default seeker state if the subclass does not + // override this method. + return (STATE) new SeekerState(); + } + + abstract protected void decodeFirst(); + abstract protected void decodeNext(); + } + + protected final void afterEncodingKeyValue(ByteBuffer in, + DataOutputStream out, boolean includesMemstoreTS) { + if (includesMemstoreTS) { + // Copy memstore timestamp from the byte buffer to the output stream. + long memstoreTS = -1; + try { + memstoreTS = ByteBufferUtils.readVLong(in); + WritableUtils.writeVLong(out, memstoreTS); + } catch (IOException ex) { + throw new RuntimeException("Unable to copy memstore timestamp " + + memstoreTS + " after encoding a key/value"); + } + } + } + + protected final void afterDecodingKeyValue(DataInputStream source, + ByteBuffer dest, boolean includesMemstoreTS) { + if (includesMemstoreTS) { + long memstoreTS = -1; + try { + // Copy memstore timestamp from the data input stream to the byte + // buffer. + memstoreTS = WritableUtils.readVLong(source); + ByteBufferUtils.writeVLong(dest, memstoreTS); + } catch (IOException ex) { + throw new RuntimeException("Unable to copy memstore timestamp " + + memstoreTS + " after decoding a key/value"); + } + } + } + +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java new file mode 100644 index 0000000..47d6d49 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteBufferUtils; + +/** + * Stores the state of data block encoder at the beginning of new key. + */ +class CompressionState { + int keyLength; + int valueLength; + + short rowLength; + int prevOffset = FIRST_KEY; + byte familyLength; + int qualifierLength; + byte type; + + final static int FIRST_KEY = -1; + + boolean isFirst() { + return prevOffset == FIRST_KEY; + } + + /** + * Analyze the key and fill the state. + * Uses mark() and reset() in ByteBuffer. + * @param in Buffer at the position where key starts + * @param keyLength Length of key in bytes + * @param valueLength Length of values in bytes + */ + void readKey(ByteBuffer in, int keyLength, int valueLength) { + readKey(in, keyLength, valueLength, 0, null); + } + + /** + * Analyze the key and fill the state assuming we know previous state. + * Uses mark() and reset() in ByteBuffer. + * @param in Buffer at the position where key starts + * @param keyLength Length of key in bytes + * @param valueLength Length of values in bytes + * @param commonPrefix how many first bytes are common with previous KeyValue + * @param previousState State from previous KeyValue + */ + void readKey(ByteBuffer in, int keyLength, int valueLength, + int commonPrefix, CompressionState previousState) { + this.keyLength = keyLength; + this.valueLength = valueLength; + + // fill the state + in.mark(); // mark beginning of key + + if (commonPrefix < KeyValue.ROW_LENGTH_SIZE) { + rowLength = in.getShort(); + ByteBufferUtils.skip(in, rowLength); + + familyLength = in.get(); + + qualifierLength = keyLength - rowLength - familyLength - + KeyValue.KEY_INFRASTRUCTURE_SIZE; + ByteBufferUtils.skip(in, familyLength + qualifierLength); + } else { + rowLength = previousState.rowLength; + familyLength = previousState.familyLength; + qualifierLength = previousState.qualifierLength + + keyLength - previousState.keyLength; + ByteBufferUtils.skip(in, (KeyValue.ROW_LENGTH_SIZE + + KeyValue.FAMILY_LENGTH_SIZE) + + rowLength + familyLength + qualifierLength); + } + + readTimestamp(in); + + type = in.get(); + + in.reset(); + } + + protected void readTimestamp(ByteBuffer in) { + // used in subclasses to add timestamp to state + ByteBufferUtils.skip(in, KeyValue.TIMESTAMP_SIZE); + } + + void copyFrom(CompressionState state) { + keyLength = state.keyLength; + valueLength = state.valueLength; + + rowLength = state.rowLength; + prevOffset = state.prevOffset; + familyLength = state.familyLength; + qualifierLength = state.qualifierLength; + type = state.type; + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java new file mode 100644 index 0000000..7b8a408 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.RawComparator; + +/** + * Just copy data, do not do any kind of compression. Use for comparison and + * benchmarking. + */ +public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder { + @Override + public void compressKeyValues(DataOutputStream out, + ByteBuffer in, boolean includesMemstoreTS) throws IOException { + in.rewind(); + ByteBufferUtils.putInt(out, in.limit()); + ByteBufferUtils.copyToStream(out, in, in.limit()); + } + + @Override + public ByteBuffer uncompressKeyValues(DataInputStream source, + int preserveHeaderLength, int skipLastBytes, boolean includesMemstoreTS) + throws IOException { + int decompressedSize = source.readInt(); + ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + + preserveHeaderLength); + buffer.position(preserveHeaderLength); + ByteBufferUtils.copyFromStream(source, buffer, decompressedSize); + + return buffer; + } + + @Override + public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { + int keyLength = block.getInt(Bytes.SIZEOF_INT); + return ByteBuffer.wrap(block.array(), + block.arrayOffset() + 3 * Bytes.SIZEOF_INT, keyLength).slice(); + } + + + @Override + public String toString() { + return CopyKeyDataBlockEncoder.class.getSimpleName(); + } + + @Override + public EncodedSeeker createSeeker(RawComparator comparator, + final boolean includesMemstoreTS) { + return new BufferedEncodedSeeker(comparator) { + @Override + protected void decodeNext() { + current.keyLength = currentBuffer.getInt(); + current.valueLength = currentBuffer.getInt(); + current.ensureSpaceForKey(); + currentBuffer.get(current.keyBuffer, 0, current.keyLength); + current.valueOffset = currentBuffer.position(); + ByteBufferUtils.skip(currentBuffer, current.valueLength); + if (includesMemstoreTS) { + current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); + } else { + current.memstoreTS = 0; + } + current.nextKvOffset = currentBuffer.position(); + } + + @Override + protected void decodeFirst() { + ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT); + current.lastCommonPrefix = 0; + decodeNext(); + } + }; + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java new file mode 100644 index 0000000..e5a2b48 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.io.RawComparator; + +/** + * Encoding of KeyValue. It aims to be fast and efficient using assumptions: + *
    + *
  • the KeyValue are stored sorted by key
  • + *
  • we know the structure of KeyValue
  • + *
  • the values are iterated always forward from beginning of block
  • + *
  • knowledge of Key Value format
  • + *
+ * It is designed to work fast enough to be feasible as in memory compression. + */ +public interface DataBlockEncoder { + /** + * Compress KeyValues and write them to output buffer. + * @param out Where to write compressed data. + * @param in Source of KeyValue for compression. + * @param includesMemstoreTS true if including memstore timestamp after every + * key-value pair + * @throws IOException If there is an error writing to output stream. + */ + public void compressKeyValues(DataOutputStream out, + ByteBuffer in, boolean includesMemstoreTS) throws IOException; + + /** + * Uncompress. + * @param source Compressed stream of KeyValues. + * @param includesMemstoreTS true if including memstore timestamp after every + * key-value pair + * @return Uncompressed block of KeyValues. + * @throws IOException If there is an error in source. + */ + public ByteBuffer uncompressKeyValues(DataInputStream source, + boolean includesMemstoreTS) throws IOException; + + /** + * Uncompress. + * @param source Compressed stream of KeyValues. + * @param allocateHeaderLength allocate this many bytes for the header. + * @param skipLastBytes Do not copy n last bytes. + * @param includesMemstoreTS true if including memstore timestamp after every + * key-value pair + * @return Uncompressed block of KeyValues. + * @throws IOException If there is an error in source. + */ + public ByteBuffer uncompressKeyValues(DataInputStream source, + int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS) + throws IOException; + + /** + * Return first key in block. Useful for indexing. + * @param block encoded block we want index, the position will not change + * @return First key in block. + */ + public ByteBuffer getFirstKeyInBlock(ByteBuffer block); + + /** + * Create a HFileBlock seeker which find KeyValues within a block. + * @param comparator what kind of comparison should be used + * @param includesMemstoreTS true if including memstore timestamp after every + * key-value pair + * @return A newly created seeker. + */ + public EncodedSeeker createSeeker(RawComparator comparator, + boolean includesMemstoreTS); + + /** + * An interface which enable to seek while underlying data is encoded. + * + * It works on one HFileBlock, but it is reusable. See + * {@link #setCurrentBuffer(ByteBuffer)}. + */ + public static interface EncodedSeeker { + /** + * Set on which buffer there will be done seeking. + * @param buffer Used for seeking. + */ + public void setCurrentBuffer(ByteBuffer buffer); + + /** @return key at current position */ + public ByteBuffer getKey(); + + /** @return value at current position */ + public ByteBuffer getValue(); + + /** @return key value at current position. */ + public ByteBuffer getKeyValueBuffer(); + + /** + * @return the KeyValue object at the current position. Includes memstore + * timestamp. + */ + public KeyValue getKeyValue(); + + /** Set position to beginning of given block */ + public void rewind(); + + /** + * Move to next position + * @return true on success, false if there is no more positions. + */ + public boolean next(); + + /** + * Move position to the same key (or one before it). + * @param key Array where is the key. + * @param offset Key position in array. + * @param length Key length in array. + * @param seekBefore find the key before in case of exact match. Does not + * matter in case of an inexact match. + * @return 0 on exact match, 1 on inexact match. + */ + public int blockSeekTo(byte[] key, int offset, int length, + boolean seekBefore); + } +} \ No newline at end of file diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java new file mode 100644 index 0000000..963b5da --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Provide access to all data block encoding algorithms. All of the algorithms + * are required to have unique id which should NEVER be changed. If you + * want to add a new algorithm/version, assign it a new id. Announce the new id + * in the HBase mailing list to prevent collisions. + */ +public enum DataBlockEncoding { + + /** Disable data block encoding. */ + NONE(0, null), + // id 1 is reserved for the BITSET algorithm to be added later + PREFIX(2, new PrefixKeyDeltaEncoder()), + DIFF(3, new DiffKeyDeltaEncoder()), + FAST_DIFF(4, new FastDiffDeltaEncoder()); + + private final short id; + private final byte[] idInBytes; + private final DataBlockEncoder encoder; + + public static final int ID_SIZE = Bytes.SIZEOF_SHORT; + + /** Maps data block encoding ids to enum instances. */ + private static Map idToEncoding = + new HashMap(); + + static { + for (DataBlockEncoding algo : values()) { + if (idToEncoding.containsKey(algo.id)) { + throw new RuntimeException(String.format( + "Two data block encoder algorithms '%s' and '%s' have " + + "the same id %d", + idToEncoding.get(algo.id).toString(), algo.toString(), + (int) algo.id)); + } + idToEncoding.put(algo.id, algo); + } + } + + private DataBlockEncoding(int id, DataBlockEncoder encoder) { + if (id < Short.MIN_VALUE || id > Short.MAX_VALUE) { + throw new AssertionError( + "Data block encoding algorithm id is out of range: " + id); + } + this.id = (short) id; + this.idInBytes = Bytes.toBytes(this.id); + if (idInBytes.length != ID_SIZE) { + // White this may seem redundant, if we accidentally serialize + // the id as e.g. an int instead of a short, all encoders will break. + throw new RuntimeException("Unexpected length of encoder ID byte " + + "representation: " + Bytes.toStringBinary(idInBytes)); + } + this.encoder = encoder; + } + + /** + * @return name converted to bytes. + */ + public byte[] getNameInBytes() { + return Bytes.toBytes(toString()); + } + + /** + * @return The id of a data block encoder. + */ + public short getId() { + return id; + } + + /** + * Writes id in bytes. + * @param stream where the id should be written. + */ + public void writeIdInBytes(OutputStream stream) throws IOException { + stream.write(idInBytes); + } + + /** + * Return new data block encoder for given algorithm type. + * @return data block encoder if algorithm is specified, null if none is + * selected. + */ + public DataBlockEncoder getEncoder() { + return encoder; + } + + /** + * Provide access to all data block encoders, even those which are not + * exposed in the enum. Useful for testing and benchmarking. + * @return list of all data block encoders. + */ + public static List getAllEncoders() { + ArrayList encoders = new ArrayList(); + for (DataBlockEncoding algo : values()) { + DataBlockEncoder encoder = algo.getEncoder(); + if (encoder != null) { + encoders.add(encoder); + } + } + + // Add encoders that are only used in testing. + encoders.add(new CopyKeyDataBlockEncoder()); + return encoders; + } + + /** + * Find and create data block encoder for given id; + * @param encoderId id of data block encoder. + * @return Newly created data block encoder. + */ + public static DataBlockEncoder getDataBlockEncoderById(short encoderId) { + if (!idToEncoding.containsKey(encoderId)) { + throw new IllegalArgumentException(String.format( + "There is no data block encoder for given id '%d'", + (int) encoderId)); + } + + return idToEncoding.get(encoderId).getEncoder(); + } + + /** + * Find and return the name of data block encoder for the given id. + * @param encoderId id of data block encoder + * @return name, same as used in options in column family + */ + public static String getNameFromId(short encoderId) { + return idToEncoding.get(encoderId).toString(); + } + + /** + * Check if given encoder has this id. + * @param encoder encoder which id will be checked + * @param encoderId id which we except + * @return true if id is right for given encoder, false otherwise + * @exception IllegalArgumentException + * thrown when there is no matching data block encoder + */ + public static boolean isCorrectEncoder(DataBlockEncoder encoder, + short encoderId) { + if (!idToEncoding.containsKey(encoderId)) { + throw new IllegalArgumentException(String.format( + "There is no data block encoder for given id '%d'", + (int) encoderId)); + } + + DataBlockEncoding algorithm = idToEncoding.get(encoderId); + return algorithm.getClass().equals(encoder.getClass()); + } + + public static DataBlockEncoding getEncodingById(short dataBlockEncodingId) { + return idToEncoding.get(dataBlockEncodingId); + } + +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java new file mode 100644 index 0000000..2461721 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java @@ -0,0 +1,548 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.RawComparator; + +/** + * Compress using: + * - store size of common prefix + * - save column family once, it is same within HFile + * - use integer compression for key, value and prefix (7-bit encoding) + * - use bits to avoid duplication key length, value length + * and type if it same as previous + * - store in 3 bits length of timestamp field + * - allow diff in timestamp instead of actual value + * + * Format: + * - 1 byte: flag + * - 1-5 bytes: key length (only if FLAG_SAME_KEY_LENGTH is not set in flag) + * - 1-5 bytes: value length (only if FLAG_SAME_VALUE_LENGTH is not set in flag) + * - 1-5 bytes: prefix length + * - ... bytes: rest of the row (if prefix length is small enough) + * - ... bytes: qualifier (or suffix depending on prefix length) + * - 1-8 bytes: timestamp or diff + * - 1 byte: type (only if FLAG_SAME_TYPE is not set in the flag) + * - ... bytes: value + */ +public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { + static final int FLAG_SAME_KEY_LENGTH = 1; + static final int FLAG_SAME_VALUE_LENGTH = 1 << 1; + static final int FLAG_SAME_TYPE = 1 << 2; + static final int FLAG_TIMESTAMP_IS_DIFF = 1 << 3; + static final int MASK_TIMESTAMP_LENGTH = (1 << 4) | (1 << 5) | (1 << 6); + static final int SHIFT_TIMESTAMP_LENGTH = 4; + static final int FLAG_TIMESTAMP_SIGN = 1 << 7; + + protected static class DiffCompressionState extends CompressionState { + long timestamp; + byte[] familyNameWithSize; + + @Override + protected void readTimestamp(ByteBuffer in) { + timestamp = in.getLong(); + } + + @Override + void copyFrom(CompressionState state) { + super.copyFrom(state); + DiffCompressionState state2 = (DiffCompressionState) state; + timestamp = state2.timestamp; + } + } + + private void compressSingleKeyValue(DiffCompressionState previousState, + DiffCompressionState currentState, DataOutputStream out, + ByteBuffer in) throws IOException { + byte flag = 0; + int kvPos = in.position(); + int keyLength = in.getInt(); + int valueLength = in.getInt(); + + long timestamp; + long diffTimestamp = 0; + int diffTimestampFitsInBytes = 0; + + int commonPrefix; + + int timestampFitsInBytes; + + if (previousState.isFirst()) { + currentState.readKey(in, keyLength, valueLength); + currentState.prevOffset = kvPos; + timestamp = currentState.timestamp; + if (timestamp < 0) { + flag |= FLAG_TIMESTAMP_SIGN; + timestamp = -timestamp; + } + timestampFitsInBytes = ByteBufferUtils.longFitsIn(timestamp); + + flag |= (timestampFitsInBytes - 1) << SHIFT_TIMESTAMP_LENGTH; + commonPrefix = 0; + + // put column family + in.mark(); + ByteBufferUtils.skip(in, currentState.rowLength + + KeyValue.ROW_LENGTH_SIZE); + ByteBufferUtils.copyToStream(out, in, currentState.familyLength + + KeyValue.FAMILY_LENGTH_SIZE); + in.reset(); + } else { + // find a common prefix and skip it + commonPrefix = + ByteBufferUtils.findCommonPrefix(in, in.position(), + previousState.prevOffset + KeyValue.ROW_OFFSET, keyLength + - KeyValue.TIMESTAMP_TYPE_SIZE); + // don't compress timestamp and type using prefix + + currentState.readKey(in, keyLength, valueLength, + commonPrefix, previousState); + currentState.prevOffset = kvPos; + timestamp = currentState.timestamp; + boolean minusTimestamp = timestamp < 0; + if (minusTimestamp) { + timestamp = -timestamp; + } + timestampFitsInBytes = ByteBufferUtils.longFitsIn(timestamp); + + if (keyLength == previousState.keyLength) { + flag |= FLAG_SAME_KEY_LENGTH; + } + if (valueLength == previousState.valueLength) { + flag |= FLAG_SAME_VALUE_LENGTH; + } + if (currentState.type == previousState.type) { + flag |= FLAG_SAME_TYPE; + } + + // encode timestamp + diffTimestamp = previousState.timestamp - currentState.timestamp; + boolean minusDiffTimestamp = diffTimestamp < 0; + if (minusDiffTimestamp) { + diffTimestamp = -diffTimestamp; + } + diffTimestampFitsInBytes = ByteBufferUtils.longFitsIn(diffTimestamp); + if (diffTimestampFitsInBytes < timestampFitsInBytes) { + flag |= (diffTimestampFitsInBytes - 1) << SHIFT_TIMESTAMP_LENGTH; + flag |= FLAG_TIMESTAMP_IS_DIFF; + if (minusDiffTimestamp) { + flag |= FLAG_TIMESTAMP_SIGN; + } + } else { + flag |= (timestampFitsInBytes - 1) << SHIFT_TIMESTAMP_LENGTH; + if (minusTimestamp) { + flag |= FLAG_TIMESTAMP_SIGN; + } + } + } + + ByteBufferUtils.copyToStream(out, flag); + if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + ByteBufferUtils.putCompressedInt(out, keyLength); + } + if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) { + ByteBufferUtils.putCompressedInt(out, valueLength); + } + + ByteBufferUtils.putCompressedInt(out, commonPrefix); + ByteBufferUtils.skip(in, commonPrefix); + + if (previousState.isFirst() || + commonPrefix < currentState.rowLength + KeyValue.ROW_LENGTH_SIZE) { + int restRowLength = + currentState.rowLength + KeyValue.ROW_LENGTH_SIZE - commonPrefix; + ByteBufferUtils.copyToStream(out, in, restRowLength); + ByteBufferUtils.skip(in, currentState.familyLength + + KeyValue.FAMILY_LENGTH_SIZE); + ByteBufferUtils.copyToStream(out, in, currentState.qualifierLength); + } else { + ByteBufferUtils.copyToStream(out, in, + keyLength - commonPrefix - KeyValue.TIMESTAMP_TYPE_SIZE); + } + + if ((flag & FLAG_TIMESTAMP_IS_DIFF) == 0) { + ByteBufferUtils.putLong(out, timestamp, timestampFitsInBytes); + } else { + ByteBufferUtils.putLong(out, diffTimestamp, diffTimestampFitsInBytes); + } + + if ((flag & FLAG_SAME_TYPE) == 0) { + ByteBufferUtils.copyToStream(out, currentState.type); + } + ByteBufferUtils.skip(in, KeyValue.TIMESTAMP_TYPE_SIZE); + + ByteBufferUtils.copyToStream(out, in, valueLength); + } + + private void uncompressSingleKeyValue(DataInputStream source, + ByteBuffer buffer, + DiffCompressionState state) + throws IOException, EncoderBufferTooSmallException { + // read the column family at the beginning + if (state.isFirst()) { + state.familyLength = source.readByte(); + state.familyNameWithSize = + new byte[(state.familyLength & 0xff) + KeyValue.FAMILY_LENGTH_SIZE]; + state.familyNameWithSize[0] = state.familyLength; + source.read(state.familyNameWithSize, KeyValue.FAMILY_LENGTH_SIZE, + state.familyLength); + } + + // read flag + byte flag = source.readByte(); + + // read key/value/common lengths + int keyLength; + int valueLength; + if ((flag & FLAG_SAME_KEY_LENGTH) != 0) { + keyLength = state.keyLength; + } else { + keyLength = ByteBufferUtils.readCompressedInt(source); + } + if ((flag & FLAG_SAME_VALUE_LENGTH) != 0) { + valueLength = state.valueLength; + } else { + valueLength = ByteBufferUtils.readCompressedInt(source); + } + int commonPrefix = ByteBufferUtils.readCompressedInt(source); + + // create KeyValue buffer and fill it prefix + int keyOffset = buffer.position(); + ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength + + KeyValue.ROW_OFFSET); + buffer.putInt(keyLength); + buffer.putInt(valueLength); + + // copy common from previous key + if (commonPrefix > 0) { + ByteBufferUtils.copyFromBuffer(buffer, buffer, state.prevOffset + + KeyValue.ROW_OFFSET, commonPrefix); + } + + // copy the rest of the key from the buffer + int keyRestLength; + if (state.isFirst() || commonPrefix < + state.rowLength + KeyValue.ROW_LENGTH_SIZE) { + // omit the family part of the key, it is always the same + short rowLength; + int rowRestLength; + + // check length of row + if (commonPrefix < KeyValue.ROW_LENGTH_SIZE) { + // not yet copied, do it now + ByteBufferUtils.copyFromStream(source, buffer, + KeyValue.ROW_LENGTH_SIZE - commonPrefix); + ByteBufferUtils.skip(buffer, -KeyValue.ROW_LENGTH_SIZE); + rowLength = buffer.getShort(); + rowRestLength = rowLength; + } else { + // already in buffer, just read it + rowLength = buffer.getShort(keyOffset + KeyValue.ROW_OFFSET); + rowRestLength = rowLength + KeyValue.ROW_LENGTH_SIZE - commonPrefix; + } + + // copy the rest of row + ByteBufferUtils.copyFromStream(source, buffer, rowRestLength); + state.rowLength = rowLength; + + // copy the column family + buffer.put(state.familyNameWithSize); + + keyRestLength = keyLength - rowLength - + state.familyNameWithSize.length - + (KeyValue.ROW_LENGTH_SIZE + KeyValue.TIMESTAMP_TYPE_SIZE); + } else { + // prevRowWithSizeLength is the same as on previous row + keyRestLength = keyLength - commonPrefix - KeyValue.TIMESTAMP_TYPE_SIZE; + } + // copy the rest of the key, after column family -> column qualifier + ByteBufferUtils.copyFromStream(source, buffer, keyRestLength); + + // handle timestamp + int timestampFitsInBytes = + ((flag & MASK_TIMESTAMP_LENGTH) >>> SHIFT_TIMESTAMP_LENGTH) + 1; + long timestamp = ByteBufferUtils.readLong(source, timestampFitsInBytes); + if ((flag & FLAG_TIMESTAMP_SIGN) != 0) { + timestamp = -timestamp; + } + if ((flag & FLAG_TIMESTAMP_IS_DIFF) != 0) { + timestamp = state.timestamp - timestamp; + } + buffer.putLong(timestamp); + + // copy the type field + byte type; + if ((flag & FLAG_SAME_TYPE) != 0) { + type = state.type; + } else { + type = source.readByte(); + } + buffer.put(type); + + // copy value part + ByteBufferUtils.copyFromStream(source, buffer, valueLength); + + state.keyLength = keyLength; + state.valueLength = valueLength; + state.prevOffset = keyOffset; + state.timestamp = timestamp; + state.type = type; + // state.qualifier is unused + } + + @Override + public void compressKeyValues(DataOutputStream out, + ByteBuffer in, boolean includesMemstoreTS) throws IOException { + in.rewind(); + ByteBufferUtils.putInt(out, in.limit()); + DiffCompressionState previousState = new DiffCompressionState(); + DiffCompressionState currentState = new DiffCompressionState(); + while (in.hasRemaining()) { + compressSingleKeyValue(previousState, currentState, + out, in); + afterEncodingKeyValue(in, out, includesMemstoreTS); + + // swap previousState <-> currentState + DiffCompressionState tmp = previousState; + previousState = currentState; + currentState = tmp; + } + } + + @Override + public ByteBuffer uncompressKeyValues(DataInputStream source, + int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS) + throws IOException { + int decompressedSize = source.readInt(); + ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + + allocHeaderLength); + buffer.position(allocHeaderLength); + DiffCompressionState state = new DiffCompressionState(); + while (source.available() > skipLastBytes) { + uncompressSingleKeyValue(source, buffer, state); + afterDecodingKeyValue(source, buffer, includesMemstoreTS); + } + + if (source.available() != skipLastBytes) { + throw new IllegalStateException("Read too much bytes."); + } + + return buffer; + } + + @Override + public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { + block.mark(); + block.position(Bytes.SIZEOF_INT); + byte familyLength = block.get(); + ByteBufferUtils.skip(block, familyLength); + byte flag = block.get(); + int keyLength = ByteBufferUtils.readCompressedInt(block); + ByteBufferUtils.readCompressedInt(block); // valueLength + ByteBufferUtils.readCompressedInt(block); // commonLength + ByteBuffer result = ByteBuffer.allocate(keyLength); + + // copy row + int pos = result.arrayOffset(); + block.get(result.array(), pos, Bytes.SIZEOF_SHORT); + pos += Bytes.SIZEOF_SHORT; + short rowLength = result.getShort(); + block.get(result.array(), pos, rowLength); + pos += rowLength; + + // copy family + int savePosition = block.position(); + block.position(Bytes.SIZEOF_INT); + block.get(result.array(), pos, familyLength + Bytes.SIZEOF_BYTE); + pos += familyLength + Bytes.SIZEOF_BYTE; + + // copy qualifier + block.position(savePosition); + int qualifierLength = + keyLength - pos + result.arrayOffset() - KeyValue.TIMESTAMP_TYPE_SIZE; + block.get(result.array(), pos, qualifierLength); + pos += qualifierLength; + + // copy the timestamp and type + int timestampFitInBytes = + ((flag & MASK_TIMESTAMP_LENGTH) >>> SHIFT_TIMESTAMP_LENGTH) + 1; + long timestamp = ByteBufferUtils.readLong(block, timestampFitInBytes); + if ((flag & FLAG_TIMESTAMP_SIGN) != 0) { + timestamp = -timestamp; + } + result.putLong(pos, timestamp); + pos += Bytes.SIZEOF_LONG; + block.get(result.array(), pos, Bytes.SIZEOF_BYTE); + + block.reset(); + return result; + } + + @Override + public String toString() { + return DiffKeyDeltaEncoder.class.getSimpleName(); + } + + protected static class DiffSeekerState extends SeekerState { + private int rowLengthWithSize; + private long timestamp; + + @Override + protected void copyFrom(SeekerState that) { + super.copyFrom(that); + DiffSeekerState other = (DiffSeekerState) that; + rowLengthWithSize = other.rowLengthWithSize; + timestamp = other.timestamp; + } + } + + @Override + public EncodedSeeker createSeeker(RawComparator comparator, + final boolean includesMemstoreTS) { + return new BufferedEncodedSeeker(comparator) { + private byte[] familyNameWithSize; + private static final int TIMESTAMP_WITH_TYPE_LENGTH = + Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE; + + private void decode(boolean isFirst) { + byte flag = currentBuffer.get(); + byte type = 0; + if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + if (!isFirst) { + type = current.keyBuffer[current.keyLength - Bytes.SIZEOF_BYTE]; + } + current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer); + } + if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) { + current.valueLength = + ByteBufferUtils.readCompressedInt(currentBuffer); + } + current.lastCommonPrefix = + ByteBufferUtils.readCompressedInt(currentBuffer); + + current.ensureSpaceForKey(); + + if (current.lastCommonPrefix < Bytes.SIZEOF_SHORT) { + // length of row is different, copy everything except family + + // copy the row size + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + Bytes.SIZEOF_SHORT - current.lastCommonPrefix); + current.rowLengthWithSize = Bytes.toShort(current.keyBuffer, 0) + + Bytes.SIZEOF_SHORT; + + // copy the rest of row + currentBuffer.get(current.keyBuffer, Bytes.SIZEOF_SHORT, + current.rowLengthWithSize - Bytes.SIZEOF_SHORT); + + // copy the column family + System.arraycopy(familyNameWithSize, 0, current.keyBuffer, + current.rowLengthWithSize, familyNameWithSize.length); + + // copy the qualifier + currentBuffer.get(current.keyBuffer, + current.rowLengthWithSize + familyNameWithSize.length, + current.keyLength - current.rowLengthWithSize - + familyNameWithSize.length - TIMESTAMP_WITH_TYPE_LENGTH); + } else if (current.lastCommonPrefix < current.rowLengthWithSize) { + // we have to copy part of row and qualifier, + // but column family is in right place + + // before column family (rest of row) + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + current.rowLengthWithSize - current.lastCommonPrefix); + + // after column family (qualifier) + currentBuffer.get(current.keyBuffer, + current.rowLengthWithSize + familyNameWithSize.length, + current.keyLength - current.rowLengthWithSize - + familyNameWithSize.length - TIMESTAMP_WITH_TYPE_LENGTH); + } else { + // copy just the ending + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + current.keyLength - TIMESTAMP_WITH_TYPE_LENGTH - + current.lastCommonPrefix); + } + + // timestamp + int pos = current.keyLength - TIMESTAMP_WITH_TYPE_LENGTH; + int timestampFitInBytes = 1 + + ((flag & MASK_TIMESTAMP_LENGTH) >>> SHIFT_TIMESTAMP_LENGTH); + long timestampOrDiff = + ByteBufferUtils.readLong(currentBuffer, timestampFitInBytes); + if ((flag & FLAG_TIMESTAMP_SIGN) != 0) { + timestampOrDiff = -timestampOrDiff; + } + if ((flag & FLAG_TIMESTAMP_IS_DIFF) == 0) { // it is timestamp + current.timestamp = timestampOrDiff; + } else { // it is diff + current.timestamp = current.timestamp - timestampOrDiff; + } + Bytes.putLong(current.keyBuffer, pos, current.timestamp); + pos += Bytes.SIZEOF_LONG; + + // type + if ((flag & FLAG_SAME_TYPE) == 0) { + currentBuffer.get(current.keyBuffer, pos, Bytes.SIZEOF_BYTE); + } else if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + current.keyBuffer[pos] = type; + } + + current.valueOffset = currentBuffer.position(); + ByteBufferUtils.skip(currentBuffer, current.valueLength); + + if (includesMemstoreTS) { + current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); + } else { + current.memstoreTS = 0; + } + current.nextKvOffset = currentBuffer.position(); + } + + @Override + protected void decodeFirst() { + ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT); + + // read column family + byte familyNameLength = currentBuffer.get(); + familyNameWithSize = new byte[familyNameLength + Bytes.SIZEOF_BYTE]; + familyNameWithSize[0] = familyNameLength; + currentBuffer.get(familyNameWithSize, Bytes.SIZEOF_BYTE, + familyNameLength); + decode(true); + } + + @Override + protected void decodeNext() { + decode(false); + } + + @Override + protected DiffSeekerState createSeekerState() { + return new DiffSeekerState(); + } + }; + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java new file mode 100644 index 0000000..279a393 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Iterator; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.io.compress.Compressor; + +/** + * Encapsulates a data block compressed using a particular encoding algorithm. + * Useful for testing and benchmarking. + */ +public class EncodedDataBlock { + private static final int BUFFER_SIZE = 4 * 1024; + protected DataBlockEncoder dataBlockEncoder; + ByteArrayOutputStream uncompressedOutputStream; + ByteBuffer uncompressedBuffer; + private byte[] cacheCompressData; + private ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + private boolean includesMemstoreTS; + + /** + * Create a buffer which will be encoded using dataBlockEncoder. + * @param dataBlockEncoder Algorithm used for compression. + */ + public EncodedDataBlock(DataBlockEncoder dataBlockEncoder, + boolean includesMemstoreTS) { + this.dataBlockEncoder = dataBlockEncoder; + uncompressedOutputStream = new ByteArrayOutputStream(BUFFER_SIZE); + } + + /** + * Add KeyValue and compress it. + * @param kv Item to be added and compressed. + */ + public void addKv(KeyValue kv) { + cacheCompressData = null; + uncompressedOutputStream.write( + kv.getBuffer(), kv.getOffset(), kv.getLength()); + } + + /** + * Provides access to compressed value. + * @return Forwards sequential iterator. + */ + public Iterator getIterator() { + final int uncompressedSize = uncompressedOutputStream.size(); + final ByteArrayInputStream bais = new ByteArrayInputStream( + getCompressedData()); + final DataInputStream dis = new DataInputStream(bais); + + + return new Iterator() { + private ByteBuffer decompressedData = null; + + @Override + public boolean hasNext() { + if (decompressedData == null) { + return uncompressedSize > 0; + } + return decompressedData.hasRemaining(); + } + + @Override + public KeyValue next() { + if (decompressedData == null) { + try { + decompressedData = dataBlockEncoder.uncompressKeyValues( + dis, includesMemstoreTS); + } catch (IOException e) { + throw new RuntimeException("Problem with data block encoder, " + + "most likely it requested more bytes than are available.", e); + } + decompressedData.rewind(); + } + + int offset = decompressedData.position(); + KeyValue kv = new KeyValue(decompressedData.array(), offset); + decompressedData.position(offset + kv.getLength()); + + return kv; + } + + @Override + public void remove() { + throw new NotImplementedException("remove() is not supported!"); + } + + @Override + public String toString() { + return "Iterator of: " + dataBlockEncoder.getClass().getName(); + } + + }; + } + + /** + * Find the size of minimal buffer that could store compressed data. + * @return Size in bytes of compressed data. + */ + public int getSize() { + return getCompressedData().length; + } + + /** + * Find the size of compressed data assuming that buffer will be compressed + * using given algorithm. + * @param compressor Algorithm used for compression. + * @param buffer Array to be compressed. + * @param offset Offset to beginning of the data. + * @param length Length to be compressed. + * @return Size of compressed data in bytes. + */ + public static int checkCompressedSize(Compressor compressor, byte[] buffer, + int offset, int length) { + byte[] compressedBuffer = new byte[buffer.length]; + // in fact the buffer could be of any positive size + compressor.setInput(buffer, offset, length); + compressor.finish(); + int currentPos = 0; + while (!compressor.finished()) { + try { + // we don't care about compressed data, + // we just want to callculate number of bytes + currentPos += compressor.compress(compressedBuffer, 0, + compressedBuffer.length); + } catch (IOException e) { + throw new RuntimeException( + "For some reason compressor couldn't read data. " + + "It is likely a problem with " + + compressor.getClass().getName(), e); + } + } + return currentPos; + } + + /** + * Estimate size after second stage of compression (e.g. LZO). + * @param compressor Algorithm which will be used for compressions. + * @return Size after second stage of compression. + */ + public int checkCompressedSize(Compressor compressor) { + // compress + byte[] compressedBytes = getCompressedData(); + return checkCompressedSize(compressor, compressedBytes, 0, + compressedBytes.length); + } + + private byte[] getCompressedData() { + // is cached + if (cacheCompressData != null) { + return cacheCompressData; + } + cacheCompressData = doCompressData(); + + return cacheCompressData; + } + + private ByteBuffer getUncompressedBuffer() { + if (uncompressedBuffer == null || + uncompressedBuffer.limit() < uncompressedOutputStream.size()) { + uncompressedBuffer = ByteBuffer.wrap( + uncompressedOutputStream.toByteArray()); + } + return uncompressedBuffer; + } + + /** + * Do the compression. + * @return Compressed byte buffer. + */ + public byte[] doCompressData() { + compressedStream.reset(); + DataOutputStream dataOut = new DataOutputStream(compressedStream); + try { + this.dataBlockEncoder.compressKeyValues( + dataOut, getUncompressedBuffer(), includesMemstoreTS); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Bug in decoding part of algorithm %s. " + + "Probably it requested more bytes than are available.", + toString()), e); + } + return compressedStream.toByteArray(); + } + + @Override + public String toString() { + return dataBlockEncoder.toString(); + } + + /** + * Get uncompressed buffer. + * @return The buffer. + */ + public byte[] getRawKeyValues() { + return uncompressedOutputStream.toByteArray(); + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/EncoderBufferTooSmallException.java src/main/java/org/apache/hadoop/hbase/io/encoding/EncoderBufferTooSmallException.java new file mode 100644 index 0000000..55195c1 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/EncoderBufferTooSmallException.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +/** + * Internal error which indicates a bug in a data block encoding algorithm. + */ +public class EncoderBufferTooSmallException extends RuntimeException { + private static final long serialVersionUID = 4767495176134878737L; + + public EncoderBufferTooSmallException(String message) { + super(message); + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java new file mode 100644 index 0000000..1363a12 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java @@ -0,0 +1,522 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.RawComparator; + +/** + * Encoder similar to {@link DiffKeyDeltaEncoder} but supposedly faster. + * + * Compress using: + * - store size of common prefix + * - save column family once in the first KeyValue + * - use integer compression for key, value and prefix (128-bit encoding) + * - use bits to avoid duplication key length, value length + * and type if it same as previous + * - store in 3 bits length of prefix timestamp + * with previous KeyValue's timestamp + * - one bit which allow to omit value if it is the same + * + * Format: + * - 1 byte: flag + * - 1-5 bytes: key length (only if FLAG_SAME_KEY_LENGTH is not set in flag) + * - 1-5 bytes: value length (only if FLAG_SAME_VALUE_LENGTH is not set in flag) + * - 1-5 bytes: prefix length + * - ... bytes: rest of the row (if prefix length is small enough) + * - ... bytes: qualifier (or suffix depending on prefix length) + * - 1-8 bytes: timestamp suffix + * - 1 byte: type (only if FLAG_SAME_TYPE is not set in the flag) + * - ... bytes: value (only if FLAG_SAME_VALUE is not set in the flag) + * + */ +public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { + final int MASK_TIMESTAMP_LENGTH = (1 << 0) | (1 << 1) | (1 << 2); + final int SHIFT_TIMESTAMP_LENGTH = 0; + final int FLAG_SAME_KEY_LENGTH = 1 << 3; + final int FLAG_SAME_VALUE_LENGTH = 1 << 4; + final int FLAG_SAME_TYPE = 1 << 5; + final int FLAG_SAME_VALUE = 1 << 6; + + private static class FastDiffCompressionState extends CompressionState { + byte[] timestamp = new byte[KeyValue.TIMESTAMP_SIZE]; + int prevTimestampOffset; + + @Override + protected void readTimestamp(ByteBuffer in) { + in.get(timestamp); + } + + @Override + void copyFrom(CompressionState state) { + super.copyFrom(state); + FastDiffCompressionState state2 = (FastDiffCompressionState) state; + System.arraycopy(state2.timestamp, 0, timestamp, 0, + KeyValue.TIMESTAMP_SIZE); + prevTimestampOffset = state2.prevTimestampOffset; + } + } + + private void compressSingleKeyValue( + FastDiffCompressionState previousState, + FastDiffCompressionState currentState, + OutputStream out, ByteBuffer in) throws IOException { + currentState.prevOffset = in.position(); + int keyLength = in.getInt(); + int valueOffset = currentState.prevOffset + keyLength + KeyValue.ROW_OFFSET; + int valueLength = in.getInt(); + byte flag = 0; + + if (previousState.isFirst()) { + // copy the key, there is no common prefix with none + ByteBufferUtils.copyToStream(out, flag); + ByteBufferUtils.putCompressedInt(out, keyLength); + ByteBufferUtils.putCompressedInt(out, valueLength); + ByteBufferUtils.putCompressedInt(out, 0); + + currentState.readKey(in, keyLength, valueLength); + + ByteBufferUtils.copyToStream(out, in, keyLength + valueLength); + } else { + // find a common prefix and skip it + int commonPrefix = ByteBufferUtils.findCommonPrefix(in, in.position(), + previousState.prevOffset + KeyValue.ROW_OFFSET, + keyLength - KeyValue.TIMESTAMP_TYPE_SIZE); + + currentState.readKey(in, keyLength, valueLength, + commonPrefix, previousState); + + if (keyLength == previousState.keyLength) { + flag |= FLAG_SAME_KEY_LENGTH; + } + if (valueLength == previousState.valueLength) { + flag |= FLAG_SAME_VALUE_LENGTH; + } + if (currentState.type == previousState.type) { + flag |= FLAG_SAME_TYPE; + } + + int prefixTimestamp = findCommonTimestampPrefix( + currentState, previousState); + flag |= (prefixTimestamp) << SHIFT_TIMESTAMP_LENGTH; + + if (ByteBufferUtils.arePartsEqual(in, previousState.prevOffset + + previousState.keyLength + KeyValue.ROW_OFFSET, + previousState.valueLength, valueOffset, valueLength)) { + flag |= FLAG_SAME_VALUE; + } + + ByteBufferUtils.copyToStream(out, flag); + if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + ByteBufferUtils.putCompressedInt(out, keyLength); + } + if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) { + ByteBufferUtils.putCompressedInt(out, valueLength); + } + ByteBufferUtils.putCompressedInt(out, commonPrefix); + + ByteBufferUtils.skip(in, commonPrefix); + if (commonPrefix < currentState.rowLength + KeyValue.ROW_LENGTH_SIZE) { + ByteBufferUtils.copyToStream(out, in, + currentState.rowLength + KeyValue.ROW_LENGTH_SIZE - commonPrefix); + ByteBufferUtils.skip(in, currentState.familyLength + + KeyValue.FAMILY_LENGTH_SIZE); + ByteBufferUtils.copyToStream(out, in, currentState.qualifierLength); + } else { + int restKeyLength = keyLength - commonPrefix - + KeyValue.TIMESTAMP_TYPE_SIZE; + ByteBufferUtils.copyToStream(out, in, restKeyLength); + } + ByteBufferUtils.skip(in, prefixTimestamp); + ByteBufferUtils.copyToStream(out, in, + KeyValue.TIMESTAMP_SIZE - prefixTimestamp); + + if ((flag & FLAG_SAME_TYPE) == 0) { + valueOffset -= KeyValue.TYPE_SIZE; + valueLength += KeyValue.TYPE_SIZE; + } + + ByteBufferUtils.skip(in, KeyValue.TYPE_SIZE + currentState.valueLength); + + if ((flag & FLAG_SAME_VALUE) == 0 ) { + ByteBufferUtils.copyToStream(out, in, valueOffset, valueLength); + } else { + if ((flag & FLAG_SAME_TYPE) == 0) { + ByteBufferUtils.copyToStream(out, currentState.type); + } + } + } + } + + private int findCommonTimestampPrefix(FastDiffCompressionState left, + FastDiffCompressionState right) { + int prefixTimestamp = 0; + while (prefixTimestamp < (KeyValue.TIMESTAMP_SIZE - 1) && + left.timestamp[prefixTimestamp] + == right.timestamp[prefixTimestamp]) { + prefixTimestamp++; + } + return prefixTimestamp; // has to be at most 7 bytes + } + + private void uncompressSingleKeyValue(DataInputStream source, + ByteBuffer buffer, FastDiffCompressionState state) + throws IOException, EncoderBufferTooSmallException { + byte flag = source.readByte(); + int prevKeyLength = state.keyLength; + + if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + state.keyLength = ByteBufferUtils.readCompressedInt(source); + } + if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) { + state.valueLength = ByteBufferUtils.readCompressedInt(source); + } + int commonLength = ByteBufferUtils.readCompressedInt(source); + + ByteBufferUtils.ensureSpace(buffer, state.keyLength + state.valueLength + + KeyValue.ROW_OFFSET); + + int kvPos = buffer.position(); + + if (!state.isFirst()) { + // copy the prefix + int common; + int prevOffset; + + if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) { + buffer.putInt(state.keyLength); + buffer.putInt(state.valueLength); + prevOffset = state.prevOffset + KeyValue.ROW_OFFSET; + common = commonLength; + } else { + if ((flag & FLAG_SAME_KEY_LENGTH) != 0) { + prevOffset = state.prevOffset; + common = commonLength + KeyValue.ROW_OFFSET; + } else { + buffer.putInt(state.keyLength); + prevOffset = state.prevOffset + KeyValue.KEY_LENGTH_SIZE; + common = commonLength + KeyValue.KEY_LENGTH_SIZE; + } + } + + ByteBufferUtils.copyFromBuffer(buffer, buffer, prevOffset, common); + + // copy the rest of the key from the buffer + int keyRestLength; + if (commonLength < state.rowLength + KeyValue.ROW_LENGTH_SIZE) { + // omit the family part of the key, it is always the same + int rowWithSizeLength; + int rowRestLength; + + // check length of row + if (commonLength < KeyValue.ROW_LENGTH_SIZE) { + // not yet copied, do it now + ByteBufferUtils.copyFromStream(source, buffer, + KeyValue.ROW_LENGTH_SIZE - commonLength); + + rowWithSizeLength = buffer.getShort(buffer.position() - + KeyValue.ROW_LENGTH_SIZE) + KeyValue.ROW_LENGTH_SIZE; + rowRestLength = rowWithSizeLength - KeyValue.ROW_LENGTH_SIZE; + } else { + // already in kvBuffer, just read it + rowWithSizeLength = buffer.getShort(kvPos + KeyValue.ROW_OFFSET) + + KeyValue.ROW_LENGTH_SIZE; + rowRestLength = rowWithSizeLength - commonLength; + } + + // copy the rest of row + ByteBufferUtils.copyFromStream(source, buffer, rowRestLength); + + // copy the column family + ByteBufferUtils.copyFromBuffer(buffer, buffer, + state.prevOffset + KeyValue.ROW_OFFSET + KeyValue.ROW_LENGTH_SIZE + + state.rowLength, state.familyLength + + KeyValue.FAMILY_LENGTH_SIZE); + state.rowLength = (short) (rowWithSizeLength - + KeyValue.ROW_LENGTH_SIZE); + + keyRestLength = state.keyLength - rowWithSizeLength - + state.familyLength - + (KeyValue.FAMILY_LENGTH_SIZE + KeyValue.TIMESTAMP_TYPE_SIZE); + } else { + // prevRowWithSizeLength is the same as on previous row + keyRestLength = state.keyLength - commonLength - + KeyValue.TIMESTAMP_TYPE_SIZE; + } + // copy the rest of the key, after column family == column qualifier + ByteBufferUtils.copyFromStream(source, buffer, keyRestLength); + + // copy timestamp + int prefixTimestamp = + (flag & MASK_TIMESTAMP_LENGTH) >>> SHIFT_TIMESTAMP_LENGTH; + ByteBufferUtils.copyFromBuffer(buffer, buffer, state.prevTimestampOffset, + prefixTimestamp); + state.prevTimestampOffset = buffer.position() - prefixTimestamp; + ByteBufferUtils.copyFromStream(source, buffer, KeyValue.TIMESTAMP_SIZE + - prefixTimestamp); + + // copy the type and value + if ((flag & FLAG_SAME_TYPE) != 0) { + buffer.put(state.type); + if ((flag & FLAG_SAME_VALUE) != 0) { + ByteBufferUtils.copyFromBuffer(buffer, buffer, state.prevOffset + + KeyValue.ROW_OFFSET + prevKeyLength, state.valueLength); + } else { + ByteBufferUtils.copyFromStream(source, buffer, state.valueLength); + } + } else { + if ((flag & FLAG_SAME_VALUE) != 0) { + ByteBufferUtils.copyFromStream(source, buffer, KeyValue.TYPE_SIZE); + ByteBufferUtils.copyFromBuffer(buffer, buffer, state.prevOffset + + KeyValue.ROW_OFFSET + prevKeyLength, state.valueLength); + } else { + ByteBufferUtils.copyFromStream(source, buffer, + state.valueLength + KeyValue.TYPE_SIZE); + } + state.type = buffer.get(state.prevTimestampOffset + + KeyValue.TIMESTAMP_SIZE); + } + } else { // is first element + buffer.putInt(state.keyLength); + buffer.putInt(state.valueLength); + + state.prevTimestampOffset = buffer.position() + state.keyLength - + KeyValue.TIMESTAMP_TYPE_SIZE; + ByteBufferUtils.copyFromStream(source, buffer, state.keyLength + + state.valueLength); + state.rowLength = buffer.getShort(kvPos + KeyValue.ROW_OFFSET); + state.familyLength = buffer.get(kvPos + KeyValue.ROW_OFFSET + + KeyValue.ROW_LENGTH_SIZE + state.rowLength); + state.type = buffer.get(state.prevTimestampOffset + + KeyValue.TIMESTAMP_SIZE); + } + + state.prevOffset = kvPos; + } + + @Override + public void compressKeyValues(DataOutputStream out, + ByteBuffer in, boolean includesMemstoreTS) throws IOException { + in.rewind(); + ByteBufferUtils.putInt(out, in.limit()); + FastDiffCompressionState previousState = new FastDiffCompressionState(); + FastDiffCompressionState currentState = new FastDiffCompressionState(); + while (in.hasRemaining()) { + compressSingleKeyValue(previousState, currentState, + out, in); + afterEncodingKeyValue(in, out, includesMemstoreTS); + + // swap previousState <-> currentState + FastDiffCompressionState tmp = previousState; + previousState = currentState; + currentState = tmp; + } + } + + @Override + public ByteBuffer uncompressKeyValues(DataInputStream source, + int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS) + throws IOException { + int decompressedSize = source.readInt(); + ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + + allocHeaderLength); + buffer.position(allocHeaderLength); + FastDiffCompressionState state = new FastDiffCompressionState(); + while (source.available() > skipLastBytes) { + uncompressSingleKeyValue(source, buffer, state); + afterDecodingKeyValue(source, buffer, includesMemstoreTS); + } + + if (source.available() != skipLastBytes) { + throw new IllegalStateException("Read too much bytes."); + } + + return buffer; + } + + @Override + public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { + block.mark(); + block.position(Bytes.SIZEOF_INT + Bytes.SIZEOF_BYTE); + int keyLength = ByteBufferUtils.readCompressedInt(block); + ByteBufferUtils.readCompressedInt(block); // valueLength + ByteBufferUtils.readCompressedInt(block); // commonLength + int pos = block.position(); + block.reset(); + return ByteBuffer.wrap(block.array(), pos, keyLength).slice(); + } + + @Override + public String toString() { + return FastDiffDeltaEncoder.class.getSimpleName(); + } + + protected static class FastDiffSeekerState extends SeekerState { + private byte[] prevTimestampAndType = + new byte[KeyValue.TIMESTAMP_TYPE_SIZE]; + private int rowLengthWithSize; + private int familyLengthWithSize; + + @Override + protected void copyFrom(SeekerState that) { + super.copyFrom(that); + FastDiffSeekerState other = (FastDiffSeekerState) that; + System.arraycopy(other.prevTimestampAndType, 0, + prevTimestampAndType, 0, + KeyValue.TIMESTAMP_TYPE_SIZE); + rowLengthWithSize = other.rowLengthWithSize; + familyLengthWithSize = other.familyLengthWithSize; + } + } + + @Override + public EncodedSeeker createSeeker(RawComparator comparator, + final boolean includesMemstoreTS) { + return new BufferedEncodedSeeker(comparator) { + private void decode(boolean isFirst) { + byte flag = currentBuffer.get(); + if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + if (!isFirst) { + System.arraycopy(current.keyBuffer, + current.keyLength - current.prevTimestampAndType.length, + current.prevTimestampAndType, 0, + current.prevTimestampAndType.length); + } + current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer); + } + if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) { + current.valueLength = + ByteBufferUtils.readCompressedInt(currentBuffer); + } + current.lastCommonPrefix = + ByteBufferUtils.readCompressedInt(currentBuffer); + + current.ensureSpaceForKey(); + + if (isFirst) { + // copy everything + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + current.keyLength - current.prevTimestampAndType.length); + current.rowLengthWithSize = Bytes.toShort(current.keyBuffer, 0) + + Bytes.SIZEOF_SHORT; + current.familyLengthWithSize = + current.keyBuffer[current.rowLengthWithSize] + Bytes.SIZEOF_BYTE; + } else if (current.lastCommonPrefix < Bytes.SIZEOF_SHORT) { + // length of row is different, copy everything except family + + // copy the row size + int oldRowLengthWithSize = current.rowLengthWithSize; + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + Bytes.SIZEOF_SHORT - current.lastCommonPrefix); + current.rowLengthWithSize = Bytes.toShort(current.keyBuffer, 0) + + Bytes.SIZEOF_SHORT; + + // move the column family + System.arraycopy(current.keyBuffer, oldRowLengthWithSize, + current.keyBuffer, current.rowLengthWithSize, + current.familyLengthWithSize); + + // copy the rest of row + currentBuffer.get(current.keyBuffer, Bytes.SIZEOF_SHORT, + current.rowLengthWithSize - Bytes.SIZEOF_SHORT); + + // copy the qualifier + currentBuffer.get(current.keyBuffer, current.rowLengthWithSize + + current.familyLengthWithSize, current.keyLength + - current.rowLengthWithSize - current.familyLengthWithSize + - current.prevTimestampAndType.length); + } else if (current.lastCommonPrefix < current.rowLengthWithSize) { + // We have to copy part of row and qualifier, but the column family + // is in the right place. + + // before column family (rest of row) + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + current.rowLengthWithSize - current.lastCommonPrefix); + + // after column family (qualifier) + currentBuffer.get(current.keyBuffer, current.rowLengthWithSize + + current.familyLengthWithSize, current.keyLength + - current.rowLengthWithSize - current.familyLengthWithSize + - current.prevTimestampAndType.length); + } else { + // copy just the ending + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + current.keyLength - current.prevTimestampAndType.length + - current.lastCommonPrefix); + } + + // timestamp + int pos = current.keyLength - current.prevTimestampAndType.length; + int commonTimestampPrefix = (flag & MASK_TIMESTAMP_LENGTH) >>> + SHIFT_TIMESTAMP_LENGTH; + if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + System.arraycopy(current.prevTimestampAndType, 0, current.keyBuffer, + pos, commonTimestampPrefix); + } + pos += commonTimestampPrefix; + currentBuffer.get(current.keyBuffer, pos, + Bytes.SIZEOF_LONG - commonTimestampPrefix); + pos += Bytes.SIZEOF_LONG - commonTimestampPrefix; + + // type + if ((flag & FLAG_SAME_TYPE) == 0) { + currentBuffer.get(current.keyBuffer, pos, Bytes.SIZEOF_BYTE); + } else if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { + current.keyBuffer[pos] = + current.prevTimestampAndType[Bytes.SIZEOF_LONG]; + } + + // handle value + if ((flag & FLAG_SAME_VALUE) == 0) { + current.valueOffset = currentBuffer.position(); + ByteBufferUtils.skip(currentBuffer, current.valueLength); + } + + if (includesMemstoreTS) { + current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); + } else { + current.memstoreTS = 0; + } + current.nextKvOffset = currentBuffer.position(); + } + + @Override + protected void decodeFirst() { + ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT); + decode(true); + } + + @Override + protected void decodeNext() { + decode(false); + } + + @Override + protected FastDiffSeekerState createSeekerState() { + return new FastDiffSeekerState(); + } + }; + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java new file mode 100644 index 0000000..51ef41a --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.RawComparator; + +/** + * Compress key by storing size of common prefix with previous KeyValue + * and storing raw size of rest. + * + * Format: + * 1-5 bytes: compressed key length minus prefix (7-bit encoding) + * 1-5 bytes: compressed value length (7-bit encoding) + * 1-3 bytes: compressed length of common key prefix + * ... bytes: rest of key (including timestamp) + * ... bytes: value + * + * In a worst case compressed KeyValue will be three bytes longer than original. + * + */ +public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder { + + private int addKv(int offset, DataOutputStream out, + ByteBuffer in, int prevKeyLength) throws IOException { + int keyLength = in.getInt(); + int valueLength = in.getInt(); + + if (offset == -1) { + // copy the key, there is no common prefix with none + ByteBufferUtils.putCompressedInt(out, keyLength); + ByteBufferUtils.putCompressedInt(out, valueLength); + ByteBufferUtils.putCompressedInt(out, 0); + ByteBufferUtils.copyToStream(out, in, keyLength + valueLength); + } else { + // find a common prefix and skip it + int common = ByteBufferUtils.findCommonPrefix( + in, offset + KeyValue.ROW_OFFSET, + in.position(), + Math.min(prevKeyLength, keyLength)); + + ByteBufferUtils.putCompressedInt(out, keyLength - common); + ByteBufferUtils.putCompressedInt(out, valueLength); + ByteBufferUtils.putCompressedInt(out, common); + + ByteBufferUtils.skip(in, common); + ByteBufferUtils.copyToStream(out, in, keyLength - common + valueLength); + } + + return keyLength; + } + + @Override + public void compressKeyValues(DataOutputStream writeHere, + ByteBuffer in, boolean includesMemstoreTS) throws IOException { + in.rewind(); + ByteBufferUtils.putInt(writeHere, in.limit()); + int prevOffset = -1; + int offset = 0; + int keyLength = 0; + while (in.hasRemaining()) { + offset = in.position(); + keyLength = addKv(prevOffset, writeHere, in, keyLength); + afterEncodingKeyValue(in, writeHere, includesMemstoreTS); + prevOffset = offset; + } + } + + @Override + public ByteBuffer uncompressKeyValues(DataInputStream source, + int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS) + throws IOException { + int decompressedSize = source.readInt(); + ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + + allocHeaderLength); + buffer.position(allocHeaderLength); + int prevKeyOffset = 0; + + while (source.available() > skipLastBytes) { + prevKeyOffset = uncompressKeyValue(source, buffer, prevKeyOffset); + afterDecodingKeyValue(source, buffer, includesMemstoreTS); + } + + if (source.available() != skipLastBytes) { + throw new IllegalStateException("Read too many bytes."); + } + + buffer.limit(buffer.position()); + return buffer; + } + + private int uncompressKeyValue(DataInputStream source, ByteBuffer buffer, + int prevKeyOffset) + throws IOException, EncoderBufferTooSmallException { + int keyLength = ByteBufferUtils.readCompressedInt(source); + int valueLength = ByteBufferUtils.readCompressedInt(source); + int commonLength = ByteBufferUtils.readCompressedInt(source); + int keyOffset; + keyLength += commonLength; + + ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength + + KeyValue.ROW_OFFSET); + + buffer.putInt(keyLength); + buffer.putInt(valueLength); + + // copy the prefix + if (commonLength > 0) { + keyOffset = buffer.position(); + ByteBufferUtils.copyFromBuffer(buffer, buffer, prevKeyOffset, + commonLength); + } else { + keyOffset = buffer.position(); + } + + // copy rest of the key and value + int len = keyLength - commonLength + valueLength; + ByteBufferUtils.copyFromStream(source, buffer, len); + return keyOffset; + } + + @Override + public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { + block.mark(); + block.position(Bytes.SIZEOF_INT); + int keyLength = ByteBufferUtils.readCompressedInt(block); + ByteBufferUtils.readCompressedInt(block); + int commonLength = ByteBufferUtils.readCompressedInt(block); + if (commonLength != 0) { + throw new AssertionError("Nonzero common length in the first key in " + + "block: " + commonLength); + } + int pos = block.position(); + block.reset(); + return ByteBuffer.wrap(block.array(), pos, keyLength).slice(); + } + + @Override + public String toString() { + return PrefixKeyDeltaEncoder.class.getSimpleName(); + } + + @Override + public EncodedSeeker createSeeker(RawComparator comparator, + final boolean includesMemstoreTS) { + return new BufferedEncodedSeeker(comparator) { + @Override + protected void decodeNext() { + current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer); + current.valueLength = ByteBufferUtils.readCompressedInt(currentBuffer); + current.lastCommonPrefix = + ByteBufferUtils.readCompressedInt(currentBuffer); + current.keyLength += current.lastCommonPrefix; + current.ensureSpaceForKey(); + currentBuffer.get(current.keyBuffer, current.lastCommonPrefix, + current.keyLength - current.lastCommonPrefix); + current.valueOffset = currentBuffer.position(); + ByteBufferUtils.skip(currentBuffer, current.valueLength); + if (includesMemstoreTS) { + current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); + } else { + current.memstoreTS = 0; + } + current.nextKvOffset = currentBuffer.position(); + } + + @Override + protected void decodeFirst() { + ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT); + decodeNext(); + } + }; + } +} diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java index 9123e70..3d206d7 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java @@ -1,4 +1,4 @@ -/* + /* * Copyright 2011 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one @@ -26,8 +26,8 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; -import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; import org.apache.hadoop.io.RawComparator; @@ -60,6 +60,13 @@ public abstract class AbstractHFileReader extends SchemaConfigured /** Filled when we read in the trailer. */ protected final Compression.Algorithm compressAlgo; + /** + * What kind of data block encoding should be used while reading, writing, + * and handling cache. + */ + protected HFileDataBlockEncoder dataBlockEncoder = + NoOpDataBlockEncoder.INSTANCE; + /** Last key in the file. Filled in when we read in the file info */ protected byte [] lastKey = null; @@ -275,8 +282,11 @@ public abstract class AbstractHFileReader extends SchemaConfigured protected int blockFetches; - public Scanner(final boolean cacheBlocks, + protected final HFile.Reader reader; + + public Scanner(final HFile.Reader reader, final boolean cacheBlocks, final boolean pread, final boolean isCompaction) { + this.reader = reader; this.cacheBlocks = cacheBlocks; this.pread = pread; this.isCompaction = isCompaction; @@ -296,6 +306,26 @@ public abstract class AbstractHFileReader extends SchemaConfigured if (!isSeeked()) throw new NotSeekedException(); } + + @Override + public int seekTo(byte[] key) throws IOException { + return seekTo(key, 0, key.length); + } + + @Override + public boolean seekBefore(byte[] key) throws IOException { + return seekBefore(key, 0, key.length); + } + + @Override + public int reseekTo(byte[] key) throws IOException { + return reseekTo(key, 0, key.length); + } + + @Override + public HFile.Reader getReader() { + return reader; + } } /** For testing */ @@ -307,4 +337,9 @@ public abstract class AbstractHFileReader extends SchemaConfigured return path; } + @Override + public DataBlockEncoding getEncodingOnDisk() { + return dataBlockEncoder.getEncodingOnDisk(); + } + } diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java index 4c74738..52f063f 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java @@ -83,6 +83,12 @@ public abstract class AbstractHFileWriter extends SchemaConfigured /** The compression algorithm used. NONE if no compression. */ protected final Compression.Algorithm compressAlgo; + + /** + * The data block encoding which will be used. + * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding. + */ + protected final HFileDataBlockEncoder blockEncoder; /** First key in a block. */ protected byte[] firstKeyInBlock = null; @@ -102,7 +108,9 @@ public abstract class AbstractHFileWriter extends SchemaConfigured public AbstractHFileWriter(CacheConfig cacheConf, FSDataOutputStream outputStream, Path path, int blockSize, - Compression.Algorithm compressAlgo, KeyComparator comparator) { + Compression.Algorithm compressAlgo, + HFileDataBlockEncoder dataBlockEncoder, + KeyComparator comparator) { super(null, path); this.outputStream = outputStream; this.path = path; @@ -110,6 +118,8 @@ public abstract class AbstractHFileWriter extends SchemaConfigured this.blockSize = blockSize; this.compressAlgo = compressAlgo == null ? HFile.DEFAULT_COMPRESSION_ALGORITHM : compressAlgo; + this.blockEncoder = dataBlockEncoder != null + ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; this.comparator = comparator != null ? comparator : Bytes.BYTES_RAWCOMPARATOR; diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index b8f007a..3f99758 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -18,28 +18,36 @@ package org.apache.hadoop.hbase.io.hfile; import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; /** * Cache Key for use with implementations of {@link BlockCache} */ public class BlockCacheKey implements HeapSize { - private String hfileName; - private long offset; + private final String hfileName; + private final long offset; + private final DataBlockEncoding encoding; + public BlockCacheKey(String file, long offset, DataBlockEncoding encoding) { + this.hfileName = file; + this.offset = offset; + this.encoding = encoding; + } /** * Construct a new BlockCacheKey * @param file The name of the HFile this block belongs to. * @param offset Offset of the block into the file */ public BlockCacheKey(String file, long offset) { - this.hfileName = file; - this.offset = offset; + this(file, offset, DataBlockEncoding.NONE); } @Override public int hashCode() { - return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32)); + return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32)) + + encoding.ordinal() * 17; } @Override @@ -56,14 +64,18 @@ public class BlockCacheKey implements HeapSize { @Override public String toString() { - return hfileName + "_" + offset; + return hfileName + "_" + offset + + (encoding == DataBlockEncoding.NONE ? "" : "_" + encoding); } - // Strings have two bytes per character due to default - // Java unicode encoding (hence the times 2). + /** + * Strings have two bytes per character due to default Java Unicode encoding + * (hence length times 2). + */ @Override public long heapSize() { - return 2 * hfileName.length() + Bytes.SIZEOF_LONG; + return ClassSize.align(ClassSize.OBJECT + 2 * hfileName.length() + + Bytes.SIZEOF_LONG + 2 * ClassSize.REFERENCE); } // can't avoid this unfortunately diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java index 995e28a..206fce9 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java @@ -39,6 +39,14 @@ public enum BlockType { /** Data block, both versions */ DATA("DATABLK*", BlockCategory.DATA), + /** An encoded data block (e.g. with prefix compression), version 2 */ + ENCODED_DATA("DATABLKE", BlockCategory.DATA) { + @Override + public int getId() { + return DATA.ordinal(); + } + }, + /** Version 2 leaf index block. Appears in the data block section */ LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX), @@ -103,6 +111,17 @@ public enum BlockType { assert magic.length == MAGIC_LENGTH; } + /** + * Use this instead of {@link #ordinal()}. They work exactly the same, except + * DATA and ENCODED_DATA get the same id using this method (overridden for + * {@link #ENCODED_DATA}). + * @return block type id from 0 to the number of block types - 1 + */ + public int getId() { + // Default implementation, can be overridden for individual enum members. + return ordinal(); + } + public void writeToStream(OutputStream out) throws IOException { out.write(magic); } diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index ace20ba..bdb7cf9 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KeyComparator; import org.apache.hadoop.hbase.io.HbaseMapWritable; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.SchemaAware; import org.apache.hadoop.hbase.util.BloomFilterWriter; @@ -243,6 +244,7 @@ public class HFile { public abstract Writer createWriter(FileSystem fs, Path path, int blockSize, Compression.Algorithm compress, + HFileDataBlockEncoder dataBlockEncoder, final KeyComparator comparator) throws IOException; public abstract Writer createWriter(FileSystem fs, Path path, @@ -368,11 +370,14 @@ public class HFile { /** Close method with optional evictOnClose */ void close(boolean evictOnClose) throws IOException; + + DataBlockEncoding getEncodingOnDisk(); } private static Reader pickReaderVersion(Path path, FSDataInputStream fsdis, - long size, boolean closeIStream, CacheConfig cacheConf) - throws IOException { + long size, boolean closeIStream, CacheConfig cacheConf, + DataBlockEncoding preferredEncodingInCache) + throws IOException { FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, size); switch (trailer.getVersion()) { case 1: @@ -380,23 +385,34 @@ public class HFile { cacheConf); case 2: return new HFileReaderV2(path, trailer, fsdis, size, closeIStream, - cacheConf); + cacheConf, preferredEncodingInCache); default: throw new IOException("Cannot instantiate reader for HFile version " + trailer.getVersion()); } } - public static Reader createReader(FileSystem fs, Path path, - CacheConfig cacheConf) throws IOException { + public static Reader createReaderWithEncoding( + FileSystem fs, Path path, CacheConfig cacheConf, + DataBlockEncoding preferredEncodingInCache) throws IOException { + final boolean closeIStream = true; return pickReaderVersion(path, fs.open(path), - fs.getFileStatus(path).getLen(), true, cacheConf); + fs.getFileStatus(path).getLen(), closeIStream, cacheConf, + preferredEncodingInCache); + } + + public static Reader createReader( + FileSystem fs, Path path, CacheConfig cacheConf) throws IOException { + return createReaderWithEncoding(fs, path, cacheConf, + DataBlockEncoding.NONE); } - public static Reader createReader(Path path, FSDataInputStream fsdis, - long size, CacheConfig cacheConf) + public static Reader createReaderFromStream(Path path, + FSDataInputStream fsdis, long size, CacheConfig cacheConf) throws IOException { - return pickReaderVersion(path, fsdis, size, false, cacheConf); + final boolean closeIStream = false; + return pickReaderVersion(path, fsdis, size, closeIStream, cacheConf, + DataBlockEncoding.NONE); } /* diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 2a04e9b..3a04fcc 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hbase.io.hfile; +import static org.apache.hadoop.hbase.io.hfile.BlockType.MAGIC_LENGTH; +import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.NONE; + import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -30,13 +33,14 @@ import java.nio.ByteBuffer; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; - -import org.apache.hadoop.hbase.io.DoubleOutputStream; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; +import org.apache.hadoop.hbase.regionserver.MemStore; import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.CompoundBloomFilter; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Writable; @@ -45,9 +49,6 @@ import org.apache.hadoop.io.compress.Decompressor; import com.google.common.base.Preconditions; -import static org.apache.hadoop.hbase.io.hfile.BlockType.MAGIC_LENGTH; -import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.NONE; - /** * Reading {@link HFile} version 1 and 2 blocks, and writing version 2 blocks. *
    @@ -75,10 +76,26 @@ import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.NONE; */ public class HFileBlock extends SchemaConfigured implements Cacheable { + public static final boolean FILL_HEADER = true; + public static final boolean DONT_FILL_HEADER = false; + /** The size of a version 2 {@link HFile} block header */ public static final int HEADER_SIZE = MAGIC_LENGTH + 2 * Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG; + /** + * We store a two-byte encoder ID at the beginning of every encoded data + * block payload (immediately after the block header). + */ + public static final int DATA_BLOCK_ENCODER_ID_SIZE = Bytes.SIZEOF_SHORT; + + /** + * The size of block header when blockType is {@link BlockType#ENCODED_DATA}. + * This extends normal header by adding the id of encoder. + */ + public static final int ENCODED_HEADER_SIZE = HEADER_SIZE + + DataBlockEncoding.ID_SIZE; + /** Just an array of bytes of the right size. */ public static final byte[] DUMMY_HEADER = new byte[HEADER_SIZE]; @@ -107,10 +124,11 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { }; private BlockType blockType; - private final int onDiskSizeWithoutHeader; + private int onDiskSizeWithoutHeader; private final int uncompressedSizeWithoutHeader; private final long prevBlockOffset; private ByteBuffer buf; + private boolean includesMemstoreTS; /** * The offset of this block in the file. Populated by the reader for @@ -146,7 +164,7 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { */ public HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer buf, - boolean fillHeader, long offset) { + boolean fillHeader, long offset, boolean includesMemstoreTS) { this.blockType = blockType; this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader; this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader; @@ -155,6 +173,7 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { if (fillHeader) overwriteHeader(); this.offset = offset; + this.includesMemstoreTS = includesMemstoreTS; } /** @@ -177,6 +196,15 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { return blockType; } + /** @return get data block encoding id that was used to encode this block */ + public short getDataBlockEncodingId() { + if (blockType != BlockType.ENCODED_DATA) { + throw new IllegalArgumentException("Querying encoder ID of a block " + + "of type other than " + BlockType.ENCODED_DATA + ": " + blockType); + } + return buf.getShort(HEADER_SIZE); + } + /** * @return the on-disk size of the block with header size included */ @@ -421,7 +449,9 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { // On-disk size, uncompressed size, and next block's on-disk size 3 * Bytes.SIZEOF_INT + // This and previous block offset - 2 * Bytes.SIZEOF_LONG + 2 * Bytes.SIZEOF_LONG + + // "Include memstore timestamp" flag + Bytes.SIZEOF_BOOLEAN ); if (buf != null) { @@ -509,29 +539,30 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { /** Compression algorithm for all blocks this instance writes. */ private final Compression.Algorithm compressAlgo; - /** - * The stream we use to accumulate data in the on-disk format for each - * block (i.e. compressed data, or uncompressed if using no compression). - * We reset this stream at the end of each block and reuse it. The header - * is written as the first {@link #HEADER_SIZE} bytes into this stream. - */ - private ByteArrayOutputStream baosOnDisk; + /** Data block encoder used for data blocks */ + private final HFileDataBlockEncoder dataBlockEncoder; /** - * The stream we use to accumulate uncompressed block data for - * cache-on-write. Null when cache-on-write is turned off. + * The stream we use to accumulate data in uncompressed format for each + * block. We reset this stream at the end of each block and reuse it. The + * header is written as the first {@link #HEADER_SIZE} bytes into this + * stream. */ private ByteArrayOutputStream baosInMemory; /** Compressor, which is also reused between consecutive blocks. */ private Compressor compressor; - /** Current block type. Set in {@link #startWriting(BlockType)}. */ + /** + * Current block type. Set in {@link #startWriting(BlockType)}. Could be + * changed in {@link #encodeDataBlockForDisk()} from {@link BlockType#DATA} + * to {@link BlockType#ENCODED_DATA}. + */ private BlockType blockType; /** * A stream that we write uncompressed bytes to, which compresses them and - * writes them to {@link #baosOnDisk}. + * writes them to {@link #baosInMemory}. */ private DataOutputStream userDataStream; @@ -542,14 +573,8 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { private byte[] onDiskBytesWithHeader; /** - * The total number of uncompressed bytes written into the current block, - * with header size not included. Valid in the READY state. - */ - private int uncompressedSizeWithoutHeader; - - /** - * Only used when we are using cache-on-write. Valid in the READY state. - * Contains the header and the uncompressed bytes, so the length is + * Valid in the READY state. Contains the header and the uncompressed (but + * potentially encoded, if this is a data block) bytes, so the length is * {@link #uncompressedSizeWithoutHeader} + {@link HFileBlock#HEADER_SIZE}. */ private byte[] uncompressedBytesWithHeader; @@ -566,30 +591,36 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { */ private long[] prevOffsetByType; - /** - * Whether we are accumulating uncompressed bytes for the purpose of - * caching on write. - */ - private boolean cacheOnWrite; - /** The offset of the previous block of the same type */ private long prevOffset; + /** Whether we are including memstore timestamp after every key/value */ + private boolean includesMemstoreTS; + + /** + * Unencoded data block for caching on write. Populated before encoding. + */ + private HFileBlock unencodedDataBlockForCaching; + /** - * @param compressionAlgorithm - * compression algorithm to use + * @param compressionAlgorithm compression algorithm to use + * @param dataBlockEncoderAlgo data block encoding algorithm to use */ - public Writer(Compression.Algorithm compressionAlgorithm) { - compressAlgo = compressionAlgorithm == null ? NONE - : compressionAlgorithm; + public Writer(Compression.Algorithm compressionAlgorithm, + HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS) { + compressAlgo = compressionAlgorithm == null ? NONE : compressionAlgorithm; + this.dataBlockEncoder = dataBlockEncoder != null + ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; - baosOnDisk = new ByteArrayOutputStream(); + baosInMemory = new ByteArrayOutputStream(); if (compressAlgo != NONE) compressor = compressionAlgorithm.getCompressor(); prevOffsetByType = new long[BlockType.values().length]; for (int i = 0; i < prevOffsetByType.length; ++i) prevOffsetByType[i] = -1; + + this.includesMemstoreTS = includesMemstoreTS; } /** @@ -598,44 +629,26 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { * @return the stream the user can write their data into * @throws IOException */ - public DataOutputStream startWriting(BlockType newBlockType, - boolean cacheOnWrite) throws IOException { + public DataOutputStream startWriting(BlockType newBlockType) + throws IOException { if (state == State.BLOCK_READY && startOffset != -1) { // We had a previous block that was written to a stream at a specific // offset. Save that offset as the last offset of a block of that type. - prevOffsetByType[blockType.ordinal()] = startOffset; + prevOffsetByType[blockType.getId()] = startOffset; } - this.cacheOnWrite = cacheOnWrite; - startOffset = -1; blockType = newBlockType; - baosOnDisk.reset(); - baosOnDisk.write(DUMMY_HEADER); + baosInMemory.reset(); + baosInMemory.write(DUMMY_HEADER); state = State.WRITING; - if (compressAlgo == NONE) { - // We do not need a compression stream or a second uncompressed stream - // for cache-on-write. - userDataStream = new DataOutputStream(baosOnDisk); - } else { - OutputStream compressingOutputStream = - compressAlgo.createCompressionStream(baosOnDisk, compressor, 0); - - if (cacheOnWrite) { - // We save uncompressed data in a cache-on-write mode. - if (baosInMemory == null) - baosInMemory = new ByteArrayOutputStream(); - baosInMemory.reset(); - baosInMemory.write(DUMMY_HEADER); - userDataStream = new DataOutputStream(new DoubleOutputStream( - compressingOutputStream, baosInMemory)); - } else { - userDataStream = new DataOutputStream(compressingOutputStream); - } - } + unencodedDataBlockForCaching = null; + + // We will compress it later in finishBlock() + userDataStream = new DataOutputStream(baosInMemory); return userDataStream; } @@ -662,45 +675,125 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { if (state == State.BLOCK_READY) return; + // This will set state to BLOCK_READY. finishBlock(); - state = State.BLOCK_READY; } /** * An internal method that flushes the compressing stream (if using * compression), serializes the header, and takes care of the separate - * uncompressed stream for caching on write, if applicable. Block writer - * state transitions must be managed by the caller. + * uncompressed stream for caching on write, if applicable. Sets block + * write state to "block ready". */ private void finishBlock() throws IOException { userDataStream.flush(); - uncompressedSizeWithoutHeader = userDataStream.size(); - onDiskBytesWithHeader = baosOnDisk.toByteArray(); - prevOffset = prevOffsetByType[blockType.ordinal()]; - putHeader(onDiskBytesWithHeader, 0); + // This does an array copy, so it is safe to cache this byte array. + uncompressedBytesWithHeader = baosInMemory.toByteArray(); + prevOffset = prevOffsetByType[blockType.getId()]; + + // We need to set state before we can package the block up for + // cache-on-write. In a way, the block is ready, but not yet encoded or + // compressed. + state = State.BLOCK_READY; + encodeDataBlockForDisk(); + + doCompression(); + putHeader(uncompressedBytesWithHeader, 0, onDiskBytesWithHeader.length, + uncompressedBytesWithHeader.length); + + if (unencodedDataBlockForCaching != null) { + // We now know the final on-disk size, save it for caching. + unencodedDataBlockForCaching.onDiskSizeWithoutHeader = + getOnDiskSizeWithoutHeader(); + unencodedDataBlockForCaching.overwriteHeader(); + } + } + + /** + * Do compression if it is enabled, or re-use the uncompressed buffer if + * it is not. Fills in the compressed block's header if doing compression. + */ + private void doCompression() throws IOException { + // do the compression + if (compressAlgo != NONE) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(DUMMY_HEADER); + + // compress the data + OutputStream compressingOutputStream = + compressAlgo.createCompressionStream(baos, compressor, 0); + compressingOutputStream.write(uncompressedBytesWithHeader, HEADER_SIZE, + uncompressedBytesWithHeader.length - HEADER_SIZE); + + // finish compression stream + compressingOutputStream.flush(); + + onDiskBytesWithHeader = baos.toByteArray(); + putHeader(onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length, + uncompressedBytesWithHeader.length); + } else { + onDiskBytesWithHeader = uncompressedBytesWithHeader; + } + } - if (cacheOnWrite && compressAlgo != NONE) { - uncompressedBytesWithHeader = baosInMemory.toByteArray(); + /** + * Encodes this block if it is a data block and encoding is turned on in + * {@link #dataBlockEncoder}. + */ + private void encodeDataBlockForDisk() throws IOException { + if (blockType != BlockType.DATA) { + return; // skip any non-data block + } - if (uncompressedSizeWithoutHeader != + // do data block encoding, if data block encoder is set + ByteBuffer rawKeyValues = ByteBuffer.wrap(uncompressedBytesWithHeader, + HEADER_SIZE, uncompressedBytesWithHeader.length - + HEADER_SIZE).slice(); + Pair encodingResult = + dataBlockEncoder.beforeWriteToDisk(rawKeyValues, + includesMemstoreTS); + + BlockType encodedBlockType = encodingResult.getSecond(); + if (encodedBlockType == BlockType.ENCODED_DATA) { + // Save the unencoded block in case we need to cache it on write. + // We don't know the final on-disk size at this point, because + // compression has not been done yet, to set it to uncompressed size + // and override later. + int uncompressedSizeWithoutHeader = getUncompressedSizeWithoutHeader(); + unencodedDataBlockForCaching = new HFileBlock(blockType, + uncompressedSizeWithoutHeader, // will override this later + uncompressedSizeWithoutHeader, prevOffset, + getUncompressedBufferWithHeader(), FILL_HEADER, startOffset, + includesMemstoreTS); + uncompressedBytesWithHeader = encodingResult.getFirst().array(); + blockType = encodedBlockType; + } else { + // There is no encoding configured. Do some extra sanity-checking. + if (encodedBlockType != BlockType.DATA) { + throw new IOException("Unexpected block type coming out of data " + + "block encoder: " + encodedBlockType); + } + if (userDataStream.size() != uncompressedBytesWithHeader.length - HEADER_SIZE) { throw new IOException("Uncompressed size mismatch: " - + uncompressedSizeWithoutHeader + " vs. " + + userDataStream.size() + " vs. " + (uncompressedBytesWithHeader.length - HEADER_SIZE)); } - - // Write the header into the beginning of the uncompressed byte array. - putHeader(uncompressedBytesWithHeader, 0); } } - /** Put the header into the given byte array at the given offset. */ - private void putHeader(byte[] dest, int offset) { + /** + * Put the header into the given byte array at the given offset. + * @param onDiskSize size of the block on disk + * @param uncompressedSize size of the block after decompression (but + * before optional data block decoding) + */ + private void putHeader(byte[] dest, int offset, int onDiskSize, + int uncompressedSize) { offset = blockType.put(dest, offset); - offset = Bytes.putInt(dest, offset, onDiskBytesWithHeader.length - - HEADER_SIZE); - offset = Bytes.putInt(dest, offset, uncompressedSizeWithoutHeader); + offset = Bytes.putInt(dest, offset, onDiskSize - HEADER_SIZE); + offset = Bytes.putInt(dest, offset, uncompressedSize - HEADER_SIZE); Bytes.putLong(dest, offset, prevOffset); } @@ -793,7 +886,7 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { */ public int getUncompressedSizeWithoutHeader() { expectState(State.BLOCK_READY); - return uncompressedSizeWithoutHeader; + return uncompressedBytesWithHeader.length - HEADER_SIZE; } /** @@ -801,7 +894,7 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { */ public int getUncompressedSizeWithHeader() { expectState(State.BLOCK_READY); - return uncompressedSizeWithoutHeader + HEADER_SIZE; + return uncompressedBytesWithHeader.length; } /** @return true if a block is being written */ @@ -832,15 +925,6 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { private byte[] getUncompressedDataWithHeader() { expectState(State.BLOCK_READY); - if (compressAlgo == NONE) - return onDiskBytesWithHeader; - - if (!cacheOnWrite) - throw new IllegalStateException("Cache-on-write is turned off"); - - if (uncompressedBytesWithHeader == null) - throw new NullPointerException(); - return uncompressedBytesWithHeader; } @@ -874,14 +958,18 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { */ public void writeBlock(BlockWritable bw, FSDataOutputStream out) throws IOException { - bw.writeToBlock(startWriting(bw.getBlockType(), false)); + bw.writeToBlock(startWriting(bw.getBlockType())); writeHeaderAndData(out); } public HFileBlock getBlockForCaching() { - return new HFileBlock(blockType, onDiskBytesWithHeader.length - - HEADER_SIZE, uncompressedSizeWithoutHeader, prevOffset, - getUncompressedBufferWithHeader(), false, startOffset); + if (unencodedDataBlockForCaching != null) { + return unencodedDataBlockForCaching; + } + return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(), + getUncompressedSizeWithoutHeader(), prevOffset, + getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset, + includesMemstoreTS); } } @@ -1214,7 +1302,8 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { // to the size of the data portion of the block without the magic record, // since the magic record gets moved to the header. HFileBlock b = new HFileBlock(newBlockType, onDiskSizeWithoutHeader, - uncompressedSizeWithMagic - MAGIC_LENGTH, -1L, buf, true, offset); + uncompressedSizeWithMagic - MAGIC_LENGTH, -1L, buf, FILL_HEADER, + offset, MemStore.NO_PERSISTENT_TS); return b; } } @@ -1232,6 +1321,13 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { /** Reads version 2 blocks from the filesystem. */ public static class FSReaderV2 extends AbstractFSReader { + /** Whether we include memstore timestamp in data blocks */ + protected boolean includesMemstoreTS; + + /** Data block encoding used to read from file */ + protected HFileDataBlockEncoder dataBlockEncoder = + NoOpDataBlockEncoder.INSTANCE; + private ThreadLocal prefetchedHeaderForThread = new ThreadLocal() { @Override @@ -1438,6 +1534,8 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { } } } + + b.includesMemstoreTS = includesMemstoreTS; b.offset = offset; return b; } @@ -1451,6 +1549,13 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { prefetchedHeader.header, 0, HEADER_SIZE); } + void setIncludesMemstoreTS(boolean enabled) { + includesMemstoreTS = enabled; + } + + void setDataBlockEncoder(HFileDataBlockEncoder encoder) { + this.dataBlockEncoder = encoder; + } } @Override @@ -1518,5 +1623,16 @@ public class HFileBlock extends SchemaConfigured implements Cacheable { return true; } + public boolean doesIncludeMemstoreTS() { + return includesMemstoreTS; + } + + public DataBlockEncoding getDataBlockEncoding() { + if (blockType == BlockType.ENCODED_DATA) { + return DataBlockEncoding.getEncodingById(getDataBlockEncodingId()); + } + return DataBlockEncoding.NONE; + } + } diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index 3f6ccb6..033eb87 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -210,7 +210,8 @@ public class HFileBlockIndex { } // Found a data block, break the loop and check our level in the tree. - if (block.getBlockType().equals(BlockType.DATA)) { + if (block.getBlockType().equals(BlockType.DATA) || + block.getBlockType().equals(BlockType.ENCODED_DATA)) { break; } @@ -733,8 +734,8 @@ public class HFileBlockIndex { long rootLevelIndexPos = out.getPos(); { - DataOutput blockStream = blockWriter.startWriting(BlockType.ROOT_INDEX, - false); + DataOutput blockStream = + blockWriter.startWriting(BlockType.ROOT_INDEX); rootChunk.writeRoot(blockStream); if (midKeyMetadata != null) blockStream.write(midKeyMetadata); @@ -829,7 +830,7 @@ public class HFileBlockIndex { BlockIndexChunk parent, BlockIndexChunk curChunk) throws IOException { long beginOffset = out.getPos(); DataOutputStream dos = blockWriter.startWriting( - BlockType.INTERMEDIATE_INDEX, cacheOnWrite()); + BlockType.INTERMEDIATE_INDEX); curChunk.writeNonRoot(dos); byte[] curFirstKey = curChunk.getBlockKey(0); blockWriter.writeHeaderAndData(out); diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java new file mode 100644 index 0000000..20fca2d --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Pair; + +/** + * Controls what kind of data block encoding is used. If data block encoding is + * not set or the given block is not a data block (encoded or not), methods + * should just return the unmodified block. + */ +public interface HFileDataBlockEncoder { + /** + * Converts a block from the on-disk format to the in-cache format. Called in + * the following cases: + *
      + *
    • After an encoded or unencoded data block is read from disk, but before + * it is put into the cache.
    • + *
    • To convert brand-new blocks to the in-cache format when doing + * cache-on-write.
    • + *
    + * @param block a block in an on-disk format (read from HFile or freshly + * generated). + * @return non null block which is coded according to the settings. + */ + public HFileBlock diskToCacheFormat(HFileBlock block, + boolean isCompaction); + + /** + * Should be called before an encoded or unencoded data block is written to + * disk. + * @param in KeyValues next to each other + * @return a non-null on-heap buffer containing the contents of the + * HFileBlock with unfilled header and block type + */ + public Pair beforeWriteToDisk( + ByteBuffer in, boolean includesMemstoreTS); + + /** + * Decides whether we should use a scanner over encoded blocks. + * @param isCompaction whether we are in a compaction. + * @return Whether to use encoded scanner. + */ + public boolean useEncodedScanner(boolean isCompaction); + + /** + * Save metadata in StoreFile which will be written to disk + * @param storeFileWriter writer for a given StoreFile + * @exception IOException on disk problems + */ + public void saveMetadata(StoreFile.Writer storeFileWriter) + throws IOException; + + public DataBlockEncoding getEncodingOnDisk(); + public DataBlockEncoding getEncodingInCache(); + +} diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java new file mode 100644 index 0000000..6bc2698 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; + +import com.google.common.base.Preconditions; + +/** + * Do different kinds of data block encoding according to column family + * options. + */ +public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { + private final DataBlockEncoding onDisk; + private final DataBlockEncoding inCache; + + public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) { + this(encoding, encoding); + } + + /** + * Do data block encoding with specified options. + * @param onDisk What kind of data block encoding will be used before writing + * HFileBlock to disk. This must be either the same as inCache or + * {@link DataBlockEncoding#NONE}. + * @param inCache What kind of data block encoding will be used in block + * cache. + */ + public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk, + DataBlockEncoding inCache) { + this.onDisk = onDisk != null ? + onDisk : DataBlockEncoding.NONE; + this.inCache = inCache != null ? + inCache : DataBlockEncoding.NONE; + Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE || + onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " + + "either the same as in-cache encoding (" + inCache + ") or " + + DataBlockEncoding.NONE); + } + + public static HFileDataBlockEncoder createFromFileInfo( + FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache) + throws IOException { + byte[] dataBlockEncodingType = + fileInfo.get(StoreFile.DATA_BLOCK_ENCODING); + if (dataBlockEncodingType == null) { + return NoOpDataBlockEncoder.INSTANCE; + } + + String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); + DataBlockEncoding onDisk; + try { + onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr); + } catch (IllegalArgumentException ex) { + throw new IOException("Invalid data block encoding type in file info: " + + dataBlockEncodingStr, ex); + } + + DataBlockEncoding inCache; + if (onDisk == DataBlockEncoding.NONE) { + // This is an "in-cache-only" encoding or fully-unencoded scenario. + // Either way, we use the given encoding (possibly NONE) specified by + // the column family in cache. + inCache = preferredEncodingInCache; + } else { + // Leave blocks in cache encoded the same way as they are on disk. + // If we switch encoding type for the CF or the in-cache-only encoding + // flag, old files will keep their encoding both on disk and in cache, + // but new files will be generated with the new encoding. + inCache = onDisk; + } + return new HFileDataBlockEncoderImpl(onDisk, inCache); + } + + @Override + public void saveMetadata(StoreFile.Writer storeFileWriter) + throws IOException { + storeFileWriter.appendFileInfo(StoreFile.DATA_BLOCK_ENCODING, + onDisk.getNameInBytes()); + } + + @Override + public DataBlockEncoding getEncodingOnDisk() { + return onDisk; + } + + @Override + public DataBlockEncoding getEncodingInCache() { + return inCache; + } + + @Override + public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) { + if (block.getBlockType() == BlockType.DATA) { + if (!useEncodedScanner(isCompaction)) { + // Unencoded block, and we don't want to encode in cache. + return block; + } + // Encode the unencoded block with the in-cache encoding. + return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS()); + } + + if (block.getBlockType() == BlockType.ENCODED_DATA) { + if (block.getDataBlockEncodingId() == onDisk.getId()) { + // The block is already in the desired in-cache encoding. + return block; + } + // We don't want to re-encode a block in a different encoding. The HFile + // reader should have been instantiated in such a way that we would not + // have to do this. + throw new AssertionError("Expected on-disk data block encoding " + + onDisk + ", got " + block.getDataBlockEncoding()); + } + return block; + } + + // Precondition: not encoded buffer + // Postcondition: same encoding as onDisk + @Override + public Pair beforeWriteToDisk(ByteBuffer in, + boolean includesMemstoreTS) { + if (onDisk == DataBlockEncoding.NONE) { + // there is no need to encode the block before writing it to disk + return new Pair(in, BlockType.DATA); + } + + ByteBuffer encodedBuffer = encodeBufferToHFileBlockBuffer(in, + onDisk, includesMemstoreTS); + return new Pair(encodedBuffer, + BlockType.ENCODED_DATA); + } + + @Override + public boolean useEncodedScanner(boolean isCompaction) { + if (isCompaction && onDisk == DataBlockEncoding.NONE) { + return false; + } + return inCache != DataBlockEncoding.NONE; + } + + private ByteBuffer encodeBufferToHFileBlockBuffer(ByteBuffer in, + DataBlockEncoding algo, boolean includesMemstoreTS) { + ByteArrayOutputStream encodedStream = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(encodedStream); + DataBlockEncoder encoder = algo.getEncoder(); + try { + encodedStream.write(HFileBlock.DUMMY_HEADER); + algo.writeIdInBytes(dataOut); + encoder.compressKeyValues(dataOut, in, + includesMemstoreTS); + } catch (IOException e) { + throw new RuntimeException(String.format("Bug in data block encoder " + + "'%s', it probably requested too much data", algo.toString()), e); + } + return ByteBuffer.wrap(encodedStream.toByteArray()); + } + + private HFileBlock encodeDataBlock(HFileBlock block, + DataBlockEncoding algo, boolean includesMemstoreTS) { + ByteBuffer compressedBuffer = encodeBufferToHFileBlockBuffer( + block.getBufferWithoutHeader(), algo, includesMemstoreTS); + int sizeWithoutHeader = compressedBuffer.limit() - HFileBlock.HEADER_SIZE; + HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA, + block.getOnDiskSizeWithoutHeader(), + sizeWithoutHeader, block.getPrevBlockOffset(), + compressedBuffer, HFileBlock.FILL_HEADER, block.getOffset(), + includesMemstoreTS); + block.passSchemaMetricsTo(encodedBlock); + return encodedBlock; + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" + + inCache + ")"; + } + +} diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index faca02d..b68e2af 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.ByteBloomFilter; @@ -170,6 +171,7 @@ public class HFilePrettyPrinter { conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR)); conf.set("fs.default.name", conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR)); + SchemaMetrics.configureGlobally(conf); try { if (!parseOptions(args)) return 1; diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java index 7d63c4f..07549b4 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; -import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.util.Bytes; @@ -42,7 +41,9 @@ import org.apache.hadoop.io.RawComparator; import com.google.common.base.Preconditions; /** - * {@link HFile} reader for version 1. + * {@link HFile} reader for version 1. Does not support data block encoding, + * even in cache only, i.e. HFile v1 blocks are always brought into cache + * unencoded. */ public class HFileReaderV1 extends AbstractHFileReader { private static final Log LOG = LogFactory.getLog(HFileReaderV1.class); @@ -280,7 +281,7 @@ public class HFileReaderV1 extends AbstractHFileReader { } long offset = dataBlockIndexReader.getRootBlockOffset(block); - BlockCacheKey cacheKey = HFile.getBlockCacheKey(name, offset); + BlockCacheKey cacheKey = new BlockCacheKey(name, offset); // For any given block from any given file, synchronize reads for said // block. @@ -297,8 +298,8 @@ public class HFileReaderV1 extends AbstractHFileReader { cacheConf.shouldCacheDataOnRead()); if (cachedBlock != null) { cacheHits.incrementAndGet(); - getSchemaMetrics().updateOnCacheHit(cachedBlock.getBlockType().getCategory(), - isCompaction); + getSchemaMetrics().updateOnCacheHit( + cachedBlock.getBlockType().getCategory(), isCompaction); return cachedBlock.getBufferWithoutHeader(); } // Carry on, please load. @@ -322,7 +323,6 @@ public class HFileReaderV1 extends AbstractHFileReader { - offset, dataBlockIndexReader.getRootBlockDataSize(block), pread); passSchemaMetricsTo(hfileBlock); hfileBlock.expectType(BlockType.DATA); - ByteBuffer buf = hfileBlock.getBufferWithoutHeader(); long delta = System.nanoTime() - startTimeNs; if (pread) { @@ -341,8 +341,7 @@ public class HFileReaderV1 extends AbstractHFileReader { cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } - - return buf; + return hfileBlock.getBufferWithoutHeader(); } } @@ -396,16 +395,101 @@ public class HFileReaderV1 extends AbstractHFileReader { } } + protected abstract static class AbstractScannerV1 + extends AbstractHFileReader.Scanner { + protected final HFileReaderV1 readerV1; + protected int currBlock; + + public AbstractScannerV1(HFileReaderV1 reader, boolean cacheBlocks, + final boolean pread, final boolean isCompaction) { + super(reader, cacheBlocks, pread, isCompaction); + readerV1 = reader; + } + + /** + * Within a loaded block, seek looking for the first key + * that is smaller than (or equal to?) the key we are interested in. + * + * A note on the seekBefore - if you have seekBefore = true, AND the + * first key in the block = key, then you'll get thrown exceptions. + * @param key to find + * @param seekBefore find the key before the exact match. + * @return + */ + protected abstract int blockSeek(byte[] key, int offset, int length, + boolean seekBefore); + + protected abstract void loadBlock(int bloc, boolean rewind) + throws IOException; + + @Override + public int seekTo(byte[] key, int offset, int length) throws IOException { + int b = readerV1.blockContainingKey(key, offset, length); + if (b < 0) return -1; // falls before the beginning of the file! :-( + // Avoid re-reading the same block (that'd be dumb). + loadBlock(b, true); + return blockSeek(key, offset, length, false); + } + + @Override + public int reseekTo(byte[] key, int offset, int length) + throws IOException { + if (blockBuffer != null && currKeyLen != 0) { + ByteBuffer bb = getKey(); + int compared = reader.getComparator().compare(key, offset, + length, bb.array(), bb.arrayOffset(), bb.limit()); + if (compared < 1) { + // If the required key is less than or equal to current key, then + // don't do anything. + return compared; + } + } + + int b = readerV1.blockContainingKey(key, offset, length); + if (b < 0) { + return -1; + } + loadBlock(b, false); + return blockSeek(key, offset, length, false); + } + + @Override + public boolean seekBefore(byte[] key, int offset, int length) + throws IOException { + int b = readerV1.blockContainingKey(key, offset, length); + if (b < 0) + return false; // key is before the start of the file. + + // Question: does this block begin with 'key'? + byte[] firstkKey = reader.getDataBlockIndexReader().getRootBlockKey(b); + if (reader.getComparator().compare(firstkKey, 0, firstkKey.length, + key, offset, length) == 0) { + // Ok the key we're interested in is the first of the block, so go back + // by one. + if (b == 0) { + // we have a 'problem', the key we want is the first of the file. + return false; + } + b--; + // TODO shortcut: seek forward in this block to the last key of the + // block. + } + loadBlock(b, true); + blockSeek(key, offset, length, true); + return true; + } + } + /** * Implementation of {@link HFileScanner} interface. */ - protected static class ScannerV1 extends AbstractHFileReader.Scanner { - private final HFileReaderV1 reader; - private int currBlock; + + protected static class ScannerV1 extends AbstractScannerV1 { + private HFileReaderV1 reader; public ScannerV1(HFileReaderV1 reader, boolean cacheBlocks, final boolean pread, final boolean isCompaction) { - super(cacheBlocks, pread, isCompaction); + super(reader, cacheBlocks, pread, isCompaction); this.reader = reader; } @@ -486,57 +570,7 @@ public class HFileReaderV1 extends AbstractHFileReader { } @Override - public int seekTo(byte[] key) throws IOException { - return seekTo(key, 0, key.length); - } - - @Override - public int seekTo(byte[] key, int offset, int length) throws IOException { - int b = reader.blockContainingKey(key, offset, length); - if (b < 0) return -1; // falls before the beginning of the file! :-( - // Avoid re-reading the same block (that'd be dumb). - loadBlock(b, true); - return blockSeek(key, offset, length, false); - } - - @Override - public int reseekTo(byte[] key) throws IOException { - return reseekTo(key, 0, key.length); - } - - @Override - public int reseekTo(byte[] key, int offset, int length) - throws IOException { - if (blockBuffer != null && currKeyLen != 0) { - ByteBuffer bb = getKey(); - int compared = reader.getComparator().compare(key, offset, - length, bb.array(), bb.arrayOffset(), bb.limit()); - if (compared <= 0) { - // If the required key is less than or equal to current key, then - // don't do anything. - return compared; - } - } - - int b = reader.blockContainingKey(key, offset, length); - if (b < 0) { - return -1; - } - loadBlock(b, false); - return blockSeek(key, offset, length, false); - } - - /** - * Within a loaded block, seek looking for the first key - * that is smaller than (or equal to?) the key we are interested in. - * - * A note on the seekBefore - if you have seekBefore = true, AND the - * first key in the block = key, then you'll get thrown exceptions. - * @param key to find - * @param seekBefore find the key before the exact match. - * @return - */ - private int blockSeek(byte[] key, int offset, int length, + protected int blockSeek(byte[] key, int offset, int length, boolean seekBefore) { int klen, vlen; int lastLen = 0; @@ -578,37 +612,6 @@ public class HFileReaderV1 extends AbstractHFileReader { } @Override - public boolean seekBefore(byte[] key) throws IOException { - return seekBefore(key, 0, key.length); - } - - @Override - public boolean seekBefore(byte[] key, int offset, int length) - throws IOException { - int b = reader.blockContainingKey(key, offset, length); - if (b < 0) - return false; // key is before the start of the file. - - // Question: does this block begin with 'key'? - byte[] firstkKey = reader.getDataBlockIndexReader().getRootBlockKey(b); - if (reader.getComparator().compare(firstkKey, 0, firstkKey.length, - key, offset, length) == 0) { - // Ok the key we're interested in is the first of the block, so go back - // by one. - if (b == 0) { - // we have a 'problem', the key we want is the first of the file. - return false; - } - b--; - // TODO shortcut: seek forward in this block to the last key of the - // block. - } - loadBlock(b, true); - blockSeek(key, offset, length, true); - return true; - } - - @Override public String getKeyString() { return Bytes.toStringBinary(blockBuffer.array(), blockBuffer.arrayOffset() + blockBuffer.position(), currKeyLen); @@ -621,11 +624,6 @@ public class HFileReaderV1 extends AbstractHFileReader { } @Override - public Reader getReader() { - return reader; - } - - @Override public boolean seekTo() throws IOException { if (reader.getDataBlockIndexReader().isEmpty()) { return false; @@ -645,7 +643,8 @@ public class HFileReaderV1 extends AbstractHFileReader { return true; } - private void loadBlock(int bloc, boolean rewind) throws IOException { + @Override + protected void loadBlock(int bloc, boolean rewind) throws IOException { if (blockBuffer == null) { blockBuffer = reader.readBlockBuffer(bloc, cacheBlocks, pread, isCompaction); diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index 59033f4..b5914d4 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -30,6 +30,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; @@ -80,17 +82,20 @@ public class HFileReaderV2 extends AbstractHFileReader { * @param size Length of the stream. * @param closeIStream Whether to close the stream. * @param cacheConf Cache configuration. - * @throws IOException + * @param preferredEncodingInCache the encoding to use in cache in case we + * have a choice. If the file is already encoded on disk, we will + * still use its on-disk encoding in cache. */ public HFileReaderV2(Path path, FixedFileTrailer trailer, final FSDataInputStream fsdis, final long size, - final boolean closeIStream, final CacheConfig cacheConf) - throws IOException { + final boolean closeIStream, final CacheConfig cacheConf, + DataBlockEncoding preferredEncodingInCache) + throws IOException { super(path, trailer, fsdis, size, closeIStream, cacheConf); - trailer.expectVersion(2); - fsBlockReader = new HFileBlock.FSReaderV2(fsdis, compressAlgo, - fileSize); + HFileBlock.FSReaderV2 fsBlockReader = new HFileBlock.FSReaderV2(fsdis, + compressAlgo, fileSize); + this.fsBlockReader = fsBlockReader; // Comparator class name is stored in the trailer in version 2. comparator = trailer.createComparator(); @@ -122,9 +127,17 @@ public class HFileReaderV2 extends AbstractHFileReader { lastKey = fileInfo.get(FileInfo.LASTKEY); avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN)); avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN)); - byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION); - includesMemstoreTS = (keyValueFormatVersion != null && - Bytes.toInt(keyValueFormatVersion) == HFileWriterV2.KEY_VALUE_VER_WITH_MEMSTORE); + byte [] keyValueFormatVersion = + fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION); + includesMemstoreTS = keyValueFormatVersion != null && + Bytes.toInt(keyValueFormatVersion) == + HFileWriterV2.KEY_VALUE_VER_WITH_MEMSTORE; + fsBlockReader.setIncludesMemstoreTS(includesMemstoreTS); + + // Read data block encoding algorithm name from file info. + dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo, + preferredEncodingInCache); + fsBlockReader.setDataBlockEncoder(dataBlockEncoder); // Store all other load-on-open blocks for further consumption. HFileBlock b; @@ -145,9 +158,15 @@ public class HFileReaderV2 extends AbstractHFileReader { * @param isCompaction is scanner being used for a compaction? * @return Scanner on this file. */ - @Override - public HFileScanner getScanner(boolean cacheBlocks, final boolean pread, + @Override + public HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction) { + // check if we want to use data block encoding in memory + if (dataBlockEncoder.useEncodedScanner(isCompaction)) { + return new EncodedScannerV2(this, cacheBlocks, pread, isCompaction, + includesMemstoreTS); + } + return new ScannerV2(this, cacheBlocks, pread, isCompaction); } @@ -249,15 +268,16 @@ public class HFileReaderV2 extends AbstractHFileReader { // the other choice is to duplicate work (which the cache would prevent you // from doing). - BlockCacheKey cacheKey = HFile.getBlockCacheKey(name, dataBlockOffset); + BlockCacheKey cacheKey = new BlockCacheKey(name, dataBlockOffset, + dataBlockEncoder.getEncodingInCache()); IdLock.Entry lockEntry = offsetLock.getLockEntry(dataBlockOffset); try { blockLoads.incrementAndGet(); // Check cache for block. If found return. if (cacheConf.isBlockCacheEnabled()) { - HFileBlock cachedBlock = - (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, cacheBlock); + HFileBlock cachedBlock = (HFileBlock) + cacheConf.getBlockCache().getBlock(cacheKey, cacheBlock); if (cachedBlock != null) { BlockCategory blockCategory = cachedBlock.getBlockType().getCategory(); @@ -265,8 +285,9 @@ public class HFileReaderV2 extends AbstractHFileReader { getSchemaMetrics().updateOnCacheHit(blockCategory, isCompaction); - if (cachedBlock.getBlockType() == BlockType.DATA) + if (cachedBlock.getBlockType() == BlockType.DATA) { HFile.dataBlockReadCnt.incrementAndGet(); + } return cachedBlock; } // Carry on, please load. @@ -276,6 +297,8 @@ public class HFileReaderV2 extends AbstractHFileReader { long startTimeNs = System.nanoTime(); HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, -1, pread); + hfileBlock = dataBlockEncoder.diskToCacheFormat(hfileBlock, + isCompaction); passSchemaMetricsTo(hfileBlock); BlockCategory blockCategory = hfileBlock.getBlockType().getCategory(); @@ -345,31 +368,160 @@ public class HFileReaderV2 extends AbstractHFileReader { } } + protected abstract static class AbstractScannerV2 + extends AbstractHFileReader.Scanner { + protected HFileBlock block; + + public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks, + final boolean pread, final boolean isCompaction) { + super(r, cacheBlocks, pread, isCompaction); + } + + /** + * An internal API function. Seek to the given key, optionally rewinding to + * the first key of the block before doing the seek. + * + * @param key key byte array + * @param offset key offset in the key byte array + * @param length key length + * @param rewind whether to rewind to the first key of the block before + * doing the seek. If this is false, we are assuming we never go + * back, otherwise the result is undefined. + * @return -1 if the key is earlier than the first key of the file, + * 0 if we are at the given key, and 1 if we are past the given key + * @throws IOException + */ + protected int seekTo(byte[] key, int offset, int length, boolean rewind) + throws IOException { + HFileBlockIndex.BlockIndexReader indexReader = + reader.getDataBlockIndexReader(); + HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length, + block, cacheBlocks, pread, isCompaction); + if (seekToBlock == null) { + // This happens if the key e.g. falls before the beginning of the file. + return -1; + } + return loadBlockAndSeekToKey(seekToBlock, rewind, key, offset, length, + false); + } + + protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock); + + protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, + boolean rewind, byte[] key, int offset, int length, boolean seekBefore) + throws IOException; + + @Override + public int seekTo(byte[] key, int offset, int length) throws IOException { + // Always rewind to the first key of the block, because the given key + // might be before or after the current key. + return seekTo(key, offset, length, true); + } + + @Override + public int reseekTo(byte[] key, int offset, int length) throws IOException { + if (isSeeked()) { + ByteBuffer bb = getKey(); + int compared = reader.getComparator().compare(key, offset, + length, bb.array(), bb.arrayOffset(), bb.limit()); + if (compared < 1) { + // If the required key is less than or equal to current key, then + // don't do anything. + return compared; + } + } + + // Don't rewind on a reseek operation, because reseek implies that we are + // always going forward in the file. + return seekTo(key, offset, length, false); + } + + @Override + public boolean seekBefore(byte[] key, int offset, int length) + throws IOException { + HFileBlock seekToBlock = + reader.getDataBlockIndexReader().seekToDataBlock(key, offset, length, + block, cacheBlocks, pread, isCompaction); + if (seekToBlock == null) { + return false; + } + ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock); + if (reader.getComparator().compare(firstKey.array(), + firstKey.arrayOffset(), firstKey.limit(), key, offset, length) == 0) + { + long previousBlockOffset = seekToBlock.getPrevBlockOffset(); + // The key we are interested in + if (previousBlockOffset == -1) { + // we have a 'problem', the key we want is the first of the file. + return false; + } + + // It is important that we compute and pass onDiskSize to the block + // reader so that it does not have to read the header separately to + // figure out the size. + seekToBlock = reader.readBlock(previousBlockOffset, + seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, + pread, isCompaction); + + // TODO shortcut: seek forward in this block to the last key of the + // block. + } + loadBlockAndSeekToKey(seekToBlock, true, key, offset, length, true); + return true; + } + + + /** + * Scans blocks in the "scanned" section of the {@link HFile} until the next + * data block is found. + * + * @return the next block, or null if there are no more data blocks + * @throws IOException + */ + protected HFileBlock readNextDataBlock() throws IOException { + long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); + if (block == null) + return null; + + HFileBlock curBlock = block; + + do { + if (curBlock.getOffset() >= lastDataBlockOffset) + return null; + + if (curBlock.getOffset() < 0) { + throw new IOException("Invalid block file offset: " + block); + } + curBlock = reader.readBlock(curBlock.getOffset() + + curBlock.getOnDiskSizeWithHeader(), + curBlock.getNextBlockOnDiskSizeWithHeader(), cacheBlocks, pread, + isCompaction); + } while (!(curBlock.getBlockType().equals(BlockType.DATA) || + curBlock.getBlockType().equals(BlockType.ENCODED_DATA))); + + return curBlock; + } + } + /** * Implementation of {@link HFileScanner} interface. */ - protected static class ScannerV2 extends AbstractHFileReader.Scanner { - private HFileBlock block; + protected static class ScannerV2 extends AbstractScannerV2 { private HFileReaderV2 reader; public ScannerV2(HFileReaderV2 r, boolean cacheBlocks, final boolean pread, final boolean isCompaction) { - super(cacheBlocks, pread, isCompaction); + super(r, cacheBlocks, pread, isCompaction); this.reader = r; } @Override - public HFileReaderV2 getReader() { - return reader; - } - - @Override public KeyValue getKeyValue() { if (!isSeeked()) return null; - KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset() - + blockBuffer.position()); + KeyValue ret = new KeyValue(blockBuffer.array(), + blockBuffer.arrayOffset() + blockBuffer.position()); if (this.reader.shouldIncludeMemstoreTS()) { ret.setMemstoreTS(currMemstoreTS); } @@ -452,36 +604,6 @@ public class HFileReaderV2 extends AbstractHFileReader { } /** - * Scans blocks in the "scanned" section of the {@link HFile} until the next - * data block is found. - * - * @return the next block, or null if there are no more data blocks - * @throws IOException - */ - private HFileBlock readNextDataBlock() throws IOException { - long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); - if (block == null) - return null; - - HFileBlock curBlock = block; - - do { - if (curBlock.getOffset() >= lastDataBlockOffset) - return null; - - if (curBlock.getOffset() < 0) { - throw new IOException("Invalid block file offset: " + block); - } - curBlock = reader.readBlock(curBlock.getOffset() - + curBlock.getOnDiskSizeWithHeader(), - curBlock.getNextBlockOnDiskSizeWithHeader(), cacheBlocks, pread, - isCompaction); - } while (!curBlock.getBlockType().equals(BlockType.DATA)); - - return curBlock; - } - - /** * Positions this scanner at the start of the file. * * @return false if empty file; i.e. a call to next would return false and @@ -517,70 +639,7 @@ public class HFileReaderV2 extends AbstractHFileReader { } @Override - public int seekTo(byte[] key) throws IOException { - return seekTo(key, 0, key.length); - } - - /** - * An internal API function. Seek to the given key, optionally rewinding to - * the first key of the block before doing the seek. - * - * @param key key byte array - * @param offset key offset in the key byte array - * @param length key length - * @param rewind whether to rewind to the first key of the block before - * doing the seek. If this is false, we are assuming we never go - * back, otherwise the result is undefined. - * @return -1 if the key is earlier than the first key of the file, - * 0 if we are at the given key, and 1 if we are past the given key - * @throws IOException - */ - private int seekTo(byte[] key, int offset, int length, boolean rewind) - throws IOException { - HFileBlockIndex.BlockIndexReader indexReader = - reader.getDataBlockIndexReader(); - HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length, - block, cacheBlocks, pread, isCompaction); - - if (seekToBlock == null) { - // This happens if the key e.g. falls before the beginning of the file. - return -1; - } - return loadBlockAndSeekToKey(seekToBlock, rewind, key, offset, length, - false); - } - - @Override - public int seekTo(byte[] key, int offset, int length) throws IOException { - // Always rewind to the first key of the block, because the given key - // might be before or after the current key. - return seekTo(key, offset, length, true); - } - - @Override - public int reseekTo(byte[] key) throws IOException { - return reseekTo(key, 0, key.length); - } - - @Override - public int reseekTo(byte[] key, int offset, int length) throws IOException { - if (isSeeked()) { - ByteBuffer bb = getKey(); - int compared = reader.getComparator().compare(key, offset, - length, bb.array(), bb.arrayOffset(), bb.limit()); - if (compared < 1) { - // If the required key is less than or equal to current key, then - // don't do anything. - return compared; - } - } - - // Don't rewind on a reseek operation, because reseek implies that we are - // always going forward in the file. - return seekTo(key, offset, length, false); - } - - private int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind, + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind, byte[] key, int offset, int length, boolean seekBefore) throws IOException { if (block == null || block.getOffset() != seekToBlock.getOffset()) { @@ -599,6 +658,13 @@ public class HFileReaderV2 extends AbstractHFileReader { */ private void updateCurrBlock(HFileBlock newBlock) { block = newBlock; + + // sanity check + if (block.getBlockType() != BlockType.DATA) { + throw new IllegalStateException("ScannerV2 works only on data " + + "blocks, got " + block.getBlockType()); + } + blockBuffer = block.getBufferWithoutHeader(); readKeyValueLen(); blockFetches++; @@ -611,12 +677,14 @@ public class HFileReaderV2 extends AbstractHFileReader { blockBuffer.reset(); if (this.reader.shouldIncludeMemstoreTS()) { try { - int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position() - + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; - currMemstoreTS = Bytes.readVLong(blockBuffer.array(), memstoreTSOffset); + int memstoreTSOffset = blockBuffer.arrayOffset() + + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + + currValueLen; + currMemstoreTS = Bytes.readVLong(blockBuffer.array(), + memstoreTSOffset); currMemstoreTSLen = WritableUtils.getVIntSize(currMemstoreTS); } catch (Exception e) { - throw new RuntimeException("Error reading memstoreTS. " + e); + throw new RuntimeException("Error reading memstore timestamp", e); } } @@ -631,7 +699,7 @@ public class HFileReaderV2 extends AbstractHFileReader { } /** - * Within a loaded block, seek looking for the first key that is smaller + * Within a loaded block, seek looking for the last key that is smaller * than (or equal to?) the key we are interested in. * * A note on the seekBefore: if you have seekBefore = true, AND the first @@ -656,12 +724,13 @@ public class HFileReaderV2 extends AbstractHFileReader { blockBuffer.reset(); if (this.reader.shouldIncludeMemstoreTS()) { try { - int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position() - + KEY_VALUE_LEN_SIZE + klen + vlen; - memstoreTS = Bytes.readVLong(blockBuffer.array(), memstoreTSOffset); + int memstoreTSOffset = blockBuffer.arrayOffset() + + blockBuffer.position() + KEY_VALUE_LEN_SIZE + klen + vlen; + memstoreTS = Bytes.readVLong(blockBuffer.array(), + memstoreTSOffset); memstoreTSLen = WritableUtils.getVIntSize(memstoreTS); } catch (Exception e) { - throw new RuntimeException("Error reading memstoreTS. " + e); + throw new RuntimeException("Error reading memstore timestamp", e); } } @@ -713,11 +782,7 @@ public class HFileReaderV2 extends AbstractHFileReader { } @Override - public boolean seekBefore(byte[] key) throws IOException { - return seekBefore(key, 0, key.length); - } - - private ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) { + protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) { ByteBuffer buffer = curBlock.getBufferWithoutHeader(); // It is safe to manipulate this buffer because we own the buffer object. buffer.rewind(); @@ -730,53 +795,174 @@ public class HFileReaderV2 extends AbstractHFileReader { } @Override - public boolean seekBefore(byte[] key, int offset, int length) - throws IOException { - HFileBlock seekToBlock = - reader.getDataBlockIndexReader().seekToDataBlock(key, offset, - length, block, cacheBlocks, pread, isCompaction); - if (seekToBlock == null) { + public String getKeyString() { + return Bytes.toStringBinary(blockBuffer.array(), + blockBuffer.arrayOffset() + blockBuffer.position() + + KEY_VALUE_LEN_SIZE, currKeyLen); + } + + @Override + public String getValueString() { + return Bytes.toString(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, + currValueLen); + } + } + + /** + * ScannerV2 that operates on encoded data blocks. + */ + protected static class EncodedScannerV2 extends AbstractScannerV2 { + private DataBlockEncoder.EncodedSeeker seeker = null; + private DataBlockEncoder dataBlockEncoder = null; + private final boolean includesMemstoreTS; + + public EncodedScannerV2(HFileReaderV2 reader, boolean cacheBlocks, + boolean pread, boolean isCompaction, boolean includesMemstoreTS) { + super(reader, cacheBlocks, pread, isCompaction); + this.includesMemstoreTS = includesMemstoreTS; + } + + private void setDataBlockEncoder(DataBlockEncoder dataBlockEncoder) { + this.dataBlockEncoder = dataBlockEncoder; + seeker = dataBlockEncoder.createSeeker(reader.getComparator(), + includesMemstoreTS); + } + + /** + * Updates the current block to be the given {@link HFileBlock}. Seeks to + * the the first key/value pair. + * + * @param newBlock the block to make current + */ + private void updateCurrentBlock(HFileBlock newBlock) { + block = newBlock; + + // sanity checks + if (block.getBlockType() != BlockType.ENCODED_DATA) { + throw new IllegalStateException( + "EncodedScannerV2 works only on encoded data blocks"); + } + + short dataBlockEncoderId = block.getDataBlockEncodingId(); + if (dataBlockEncoder == null || + !DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, + dataBlockEncoderId)) { + DataBlockEncoder encoder = + DataBlockEncoding.getDataBlockEncoderById(dataBlockEncoderId); + setDataBlockEncoder(encoder); + } + + seeker.setCurrentBuffer(getEncodedBuffer(newBlock)); + blockFetches++; + } + + private ByteBuffer getEncodedBuffer(HFileBlock newBlock) { + ByteBuffer origBlock = newBlock.getBufferReadOnly(); + ByteBuffer encodedBlock = ByteBuffer.wrap(origBlock.array(), + origBlock.arrayOffset() + HFileBlock.HEADER_SIZE + + DataBlockEncoding.ID_SIZE, + origBlock.limit() - HFileBlock.HEADER_SIZE - + DataBlockEncoding.ID_SIZE).slice(); + return encodedBlock; + } + + @Override + public boolean seekTo() throws IOException { + if (reader == null) { return false; } - ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock); - if (reader.getComparator().compare(firstKey.array(), - firstKey.arrayOffset(), firstKey.limit(), key, offset, length) == 0) - { - long previousBlockOffset = seekToBlock.getPrevBlockOffset(); - // The key we are interested in - if (previousBlockOffset == -1) { - // we have a 'problem', the key we want is the first of the file. - return false; - } - // It is important that we compute and pass onDiskSize to the block - // reader so that it does not have to read the header separately to - // figure out the size. - seekToBlock = reader.readBlock(previousBlockOffset, - seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, - pread, isCompaction); + if (reader.getTrailer().getEntryCount() == 0) { + // No data blocks. + return false; + } - // TODO shortcut: seek forward in this block to the last key of the - // block. + long firstDataBlockOffset = + reader.getTrailer().getFirstDataBlockOffset(); + if (block != null && block.getOffset() == firstDataBlockOffset) { + seeker.rewind(); + return true; } - loadBlockAndSeekToKey(seekToBlock, true, key, offset, length, true); + + block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, + isCompaction); + if (block.getOffset() < 0) { + throw new IOException("Invalid block offset: " + block.getOffset()); + } + updateCurrentBlock(block); return true; } @Override + public boolean next() throws IOException { + boolean isValid = seeker.next(); + if (!isValid) { + block = readNextDataBlock(); + isValid = block != null; + if (isValid) { + updateCurrentBlock(block); + } + } + return isValid; + } + + @Override + public ByteBuffer getKey() { + assertValidSeek(); + return seeker.getKey(); + } + + @Override + public ByteBuffer getValue() { + assertValidSeek(); + return seeker.getValue(); + } + + @Override + public KeyValue getKeyValue() { + if (block == null) { + return null; + } + return seeker.getKeyValue(); + } + + @Override public String getKeyString() { - return Bytes.toStringBinary(blockBuffer.array(), - blockBuffer.arrayOffset() + blockBuffer.position() - + KEY_VALUE_LEN_SIZE, currKeyLen); + ByteBuffer keyBuffer = getKey(); + return Bytes.toStringBinary(keyBuffer.array(), + keyBuffer.arrayOffset(), keyBuffer.limit()); } @Override public String getValueString() { - return Bytes.toString(blockBuffer.array(), blockBuffer.arrayOffset() - + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, - currValueLen); + ByteBuffer valueBuffer = getValue(); + return Bytes.toStringBinary(valueBuffer.array(), + valueBuffer.arrayOffset(), valueBuffer.limit()); } + private void assertValidSeek() { + if (block == null) { + throw new NotSeekedException(); + } + } + + @Override + protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) { + return dataBlockEncoder.getFirstKeyInBlock(getEncodedBuffer(curBlock)); + } + + @Override + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind, + byte[] key, int offset, int length, boolean seekBefore) + throws IOException { + if (block == null || block.getOffset() != seekToBlock.getOffset()) { + updateCurrentBlock(seekToBlock); + } else if (rewind) { + seeker.rewind(); + } + return seeker.blockSeekTo(key, offset, length, seekBefore); + } } /** diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java index e58d0cc..be51d6e 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java @@ -35,8 +35,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KeyComparator; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; +import org.apache.hadoop.hbase.regionserver.MemStore; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; @@ -44,7 +46,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.compress.Compressor; /** - * Writes version 1 HFiles. Mainly used for testing backwards-compatibilty. + * Writes version 1 HFiles. Mainly used for testing backwards-compatibility. */ public class HFileWriterV1 extends AbstractHFileWriter { @@ -91,16 +93,17 @@ public class HFileWriterV1 extends AbstractHFileWriter { @Override public Writer createWriter(FileSystem fs, Path path, int blockSize, - Compression.Algorithm compressAlgo, final KeyComparator comparator) + Algorithm compressAlgo, HFileDataBlockEncoder dataBlockEncoder, + KeyComparator comparator) throws IOException { return new HFileWriterV1(conf, cacheConf, fs, path, blockSize, - compressAlgo, comparator); + compressAlgo, dataBlockEncoder, comparator); } @Override public Writer createWriter(FileSystem fs, Path path, int blockSize, - String compressAlgoName, - final KeyComparator comparator) throws IOException { + String compressAlgoName, KeyComparator comparator) + throws IOException { return new HFileWriterV1(conf, cacheConf, fs, path, blockSize, compressAlgoName, comparator); } @@ -117,7 +120,8 @@ public class HFileWriterV1 extends AbstractHFileWriter { public Writer createWriter(final FSDataOutputStream ostream, final int blockSize, final Compression.Algorithm compress, final KeyComparator c) throws IOException { - return new HFileWriterV1(cacheConf, ostream, blockSize, compress, c); + return new HFileWriterV1(cacheConf, ostream, blockSize, compress, + NoOpDataBlockEncoder.INSTANCE, c); } } @@ -127,7 +131,7 @@ public class HFileWriterV1 extends AbstractHFileWriter { throws IOException { this(conf, cacheConf, fs, path, HFile.DEFAULT_BLOCKSIZE, HFile.DEFAULT_COMPRESSION_ALGORITHM, - null); + NoOpDataBlockEncoder.INSTANCE, null); } /** @@ -138,15 +142,18 @@ public class HFileWriterV1 extends AbstractHFileWriter { Path path, int blockSize, String compressAlgoName, final KeyComparator comparator) throws IOException { this(conf, cacheConf, fs, path, blockSize, - compressionByName(compressAlgoName), comparator); + compressionByName(compressAlgoName), NoOpDataBlockEncoder.INSTANCE, + comparator); } /** Constructor that takes a path, creates and closes the output stream. */ - public HFileWriterV1(Configuration conf, CacheConfig cacheConf, FileSystem fs, - Path path, int blockSize, Compression.Algorithm compress, + public HFileWriterV1(Configuration conf, CacheConfig cacheConf, + FileSystem fs, Path path, + int blockSize, Compression.Algorithm compress, + HFileDataBlockEncoder blockEncoder, final KeyComparator comparator) throws IOException { super(cacheConf, createOutputStream(conf, fs, path), path, - blockSize, compress, comparator); + blockSize, compress, blockEncoder, comparator); SchemaMetrics.configureGlobally(conf); } @@ -157,15 +164,17 @@ public class HFileWriterV1 extends AbstractHFileWriter { throws IOException { this(cacheConf, outputStream, blockSize, Compression.getCompressionAlgorithmByName(compressAlgoName), - comparator); + NoOpDataBlockEncoder.INSTANCE, comparator); } /** Constructor that takes a stream. */ public HFileWriterV1(CacheConfig cacheConf, final FSDataOutputStream outputStream, final int blockSize, - final Compression.Algorithm compress, final KeyComparator comparator) + final Compression.Algorithm compress, + HFileDataBlockEncoder blockEncoder, final KeyComparator comparator) throws IOException { - super(cacheConf, outputStream, null, blockSize, compress, comparator); + super(cacheConf, outputStream, null, blockSize, compress, + blockEncoder, comparator); } /** @@ -202,13 +211,16 @@ public class HFileWriterV1 extends AbstractHFileWriter { if (cacheConf.shouldCacheDataOnWrite()) { baosDos.flush(); + // we do not do data block encoding on disk for HFile v1 byte[] bytes = baos.toByteArray(); - HFileBlock cBlock = new HFileBlock(BlockType.DATA, + HFileBlock block = new HFileBlock(BlockType.DATA, (int) (outputStream.getPos() - blockBegin), bytes.length, -1, - ByteBuffer.wrap(bytes, 0, bytes.length), true, blockBegin); - passSchemaMetricsTo(cBlock); + ByteBuffer.wrap(bytes, 0, bytes.length), HFileBlock.FILL_HEADER, + blockBegin, MemStore.NO_PERSISTENT_TS); + block = blockEncoder.diskToCacheFormat(block, false); + passSchemaMetricsTo(block); cacheConf.getBlockCache().cacheBlock( - HFile.getBlockCacheKey(name, blockBegin), cBlock); + HFile.getBlockCacheKey(name, blockBegin), block); baosDos.close(); } blockNumber++; diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java index d44a995..6643155 100644 --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java @@ -49,9 +49,13 @@ public class HFileWriterV2 extends AbstractHFileWriter { static final Log LOG = LogFactory.getLog(HFileWriterV2.class); /** Max memstore (mvcc) timestamp in FileInfo */ - public static final byte [] MAX_MEMSTORE_TS_KEY = Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); + public static final byte [] MAX_MEMSTORE_TS_KEY = + Bytes.toBytes("MAX_MEMSTORE_TS_KEY"); + /** KeyValue version in FileInfo */ - public static final byte [] KEY_VALUE_VERSION = Bytes.toBytes("KEY_VALUE_VERSION"); + public static final byte [] KEY_VALUE_VERSION = + Bytes.toBytes("KEY_VALUE_VERSION"); + /** Version for KeyValue which includes memstore timestamp */ public static final int KEY_VALUE_VER_WITH_MEMSTORE = 1; @@ -92,10 +96,10 @@ public class HFileWriterV2 extends AbstractHFileWriter { @Override public Writer createWriter(FileSystem fs, Path path, int blockSize, - Compression.Algorithm compress, + Compression.Algorithm compress, HFileDataBlockEncoder blockEncoder, final KeyComparator comparator) throws IOException { return new HFileWriterV2(conf, cacheConf, fs, path, blockSize, - compress, comparator); + compress, blockEncoder, comparator); } @Override @@ -128,7 +132,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { FileSystem fs, Path path) throws IOException { this(conf, cacheConf, fs, path, HFile.DEFAULT_BLOCKSIZE, - HFile.DEFAULT_COMPRESSION_ALGORITHM, null); + HFile.DEFAULT_COMPRESSION_ALGORITHM, null, null); } /** @@ -139,15 +143,16 @@ public class HFileWriterV2 extends AbstractHFileWriter { Path path, int blockSize, String compressAlgoName, final KeyComparator comparator) throws IOException { this(conf, cacheConf, fs, path, blockSize, - compressionByName(compressAlgoName), comparator); + compressionByName(compressAlgoName), null, comparator); } /** Constructor that takes a path, creates and closes the output stream. */ public HFileWriterV2(Configuration conf, CacheConfig cacheConf, FileSystem fs, Path path, int blockSize, Compression.Algorithm compressAlgo, + HFileDataBlockEncoder blockEncoder, final KeyComparator comparator) throws IOException { super(cacheConf, createOutputStream(conf, fs, path), path, - blockSize, compressAlgo, comparator); + blockSize, compressAlgo, blockEncoder, comparator); SchemaMetrics.configureGlobally(conf); finishInit(conf); } @@ -167,7 +172,8 @@ public class HFileWriterV2 extends AbstractHFileWriter { final FSDataOutputStream outputStream, final int blockSize, final Compression.Algorithm compress, final KeyComparator comparator) throws IOException { - super(cacheConf, outputStream, null, blockSize, compress, comparator); + super(cacheConf, outputStream, null, blockSize, compress, null, + comparator); finishInit(conf); } @@ -177,7 +183,8 @@ public class HFileWriterV2 extends AbstractHFileWriter { throw new IllegalStateException("finishInit called twice"); // HFile filesystem-level (non-caching) block writer - fsBlockWriter = new HFileBlock.Writer(compressAlgo); + fsBlockWriter = new HFileBlock.Writer(compressAlgo, blockEncoder, + includeMemstoreTS); // Data block index writer boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite(); @@ -225,8 +232,9 @@ public class HFileWriterV2 extends AbstractHFileWriter { long startTimeNs = System.nanoTime(); // Update the first data block offset for scanning. - if (firstDataBlockOffset == -1) + if (firstDataBlockOffset == -1) { firstDataBlockOffset = outputStream.getPos(); + } // Update the last data block offset lastDataBlockOffset = outputStream.getPos(); @@ -242,10 +250,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { HFile.writeOps.incrementAndGet(); if (cacheConf.shouldCacheDataOnWrite()) { - HFileBlock blockForCaching = fsBlockWriter.getBlockForCaching(); - passSchemaMetricsTo(blockForCaching); - cacheConf.getBlockCache().cacheBlock( - HFile.getBlockCacheKey(name, lastDataBlockOffset), blockForCaching); + doCacheOnWrite(lastDataBlockOffset); } } @@ -256,32 +261,41 @@ public class HFileWriterV2 extends AbstractHFileWriter { long offset = outputStream.getPos(); boolean cacheThisBlock = ibw.cacheOnWrite(); ibw.writeInlineBlock(fsBlockWriter.startWriting( - ibw.getInlineBlockType(), cacheThisBlock)); + ibw.getInlineBlockType())); fsBlockWriter.writeHeaderAndData(outputStream); ibw.blockWritten(offset, fsBlockWriter.getOnDiskSizeWithHeader(), fsBlockWriter.getUncompressedSizeWithoutHeader()); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); if (cacheThisBlock) { - // Cache this block on write. - HFileBlock cBlock = fsBlockWriter.getBlockForCaching(); - passSchemaMetricsTo(cBlock); - cacheConf.getBlockCache().cacheBlock( - HFile.getBlockCacheKey(name, offset), cBlock); + doCacheOnWrite(offset); } } } } /** + * Caches the last written HFile block. + * @param offset the offset of the block we want to cache. Used to determine + * the cache key. + */ + private void doCacheOnWrite(long offset) { + HFileBlock cacheFormatBlock = blockEncoder.diskToCacheFormat( + fsBlockWriter.getBlockForCaching(), + false); // Not a compaction + passSchemaMetricsTo(cacheFormatBlock); + cacheConf.getBlockCache().cacheBlock( + HFile.getBlockCacheKey(name, offset), cacheFormatBlock); + } + + /** * Ready a new block for writing. * * @throws IOException */ private void newBlock() throws IOException { // This is where the next block begins. - fsBlockWriter.startWriting(BlockType.DATA, - cacheConf.shouldCacheDataOnWrite()); + fsBlockWriter.startWriting(BlockType.DATA); firstKeyInBlock = null; } @@ -413,8 +427,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { // store the beginning offset long offset = outputStream.getPos(); // write the metadata content - DataOutputStream dos = fsBlockWriter.startWriting(BlockType.META, - cacheConf.shouldCacheDataOnWrite()); + DataOutputStream dos = fsBlockWriter.startWriting(BlockType.META); metaData.get(i).write(dos); fsBlockWriter.writeHeaderAndData(outputStream); @@ -440,7 +453,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { // Meta block index. metaBlockIndexWriter.writeSingleLevelIndex(fsBlockWriter.startWriting( - BlockType.ROOT_INDEX, false), "meta"); + BlockType.ROOT_INDEX), "meta"); fsBlockWriter.writeHeaderAndData(outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); @@ -450,8 +463,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { } // File info - writeFileInfo(trailer, fsBlockWriter.startWriting(BlockType.FILE_INFO, - false)); + writeFileInfo(trailer, fsBlockWriter.startWriting(BlockType.FILE_INFO)); fsBlockWriter.writeHeaderAndData(outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); diff --git src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java new file mode 100644 index 0000000..af7b509 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Pair; + +/** + * Does not perform any kind of encoding/decoding. + */ +public class NoOpDataBlockEncoder implements HFileDataBlockEncoder { + + public static final NoOpDataBlockEncoder INSTANCE = + new NoOpDataBlockEncoder(); + + /** Cannot be instantiated. Use {@link #INSTANCE} instead. */ + private NoOpDataBlockEncoder() { + } + + @Override + public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) { + if (block.getBlockType() == BlockType.ENCODED_DATA) { + throw new IllegalStateException("Unexpected encoded block"); + } + return block; + } + + @Override + public Pair beforeWriteToDisk( + ByteBuffer in, boolean includesMemstoreTS) { + return new Pair(in, BlockType.DATA); + } + + @Override + public boolean useEncodedScanner(boolean isCompaction) { + return false; + } + + @Override + public void saveMetadata(StoreFile.Writer storeFileWriter) { + } + + @Override + public DataBlockEncoding getEncodingOnDisk() { + return DataBlockEncoding.NONE; + } + + @Override + public DataBlockEncoding getEncodingInCache() { + return DataBlockEncoding.NONE; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } + +} diff --git src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java index bd574b2..d1f6b56 100644 --- src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java +++ src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java @@ -63,8 +63,11 @@ import org.apache.hadoop.hbase.client.ServerCallable; import org.apache.hadoop.hbase.io.HalfStoreFileReader; import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.io.Reference.Range; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -528,9 +531,12 @@ public class LoadIncrementalHFiles extends Configured implements Tool { CacheConfig cacheConf = new CacheConfig(conf); HalfStoreFileReader halfReader = null; StoreFile.Writer halfWriter = null; + HFileDataBlockEncoder dataBlockEncoder = new HFileDataBlockEncoderImpl( + familyDescriptor.getDataBlockEncodingOnDisk(), + familyDescriptor.getDataBlockEncoding()); try { halfReader = new HalfStoreFileReader(fs, inFile, cacheConf, - reference); + reference, DataBlockEncoding.NONE); Map fileInfo = halfReader.loadFileInfo(); int blocksize = familyDescriptor.getBlocksize(); @@ -538,7 +544,8 @@ public class LoadIncrementalHFiles extends Configured implements Tool { BloomType bloomFilterType = familyDescriptor.getBloomFilterType(); halfWriter = new StoreFile.Writer( - fs, outFile, blocksize, compression, conf, cacheConf, + fs, outFile, blocksize, compression, dataBlockEncoder, + conf, cacheConf, KeyValue.COMPARATOR, bloomFilterType, 0); HFileScanner scanner = halfReader.getScanner(false, false, false); scanner.seekTo(); @@ -638,7 +645,6 @@ public class LoadIncrementalHFiles extends Configured implements Tool { Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; - HFile.Reader reader = HFile.createReader(fs, hfile, new CacheConfig(getConf())); final byte[] first, last; diff --git src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java index 6f28227..066ae52 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java @@ -79,7 +79,7 @@ public class CompactSplitThread implements CompactionRequestor { "hbase.regionserver.thread.compaction.throttle", 0); } else { // we have a complicated default. see HBASE-3877 - long flushSize = conf.getLong("hbase.hregion.memstore.flush.size", + long flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE); long splitSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); diff --git src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index fb4ec05..e98e1e9 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -479,7 +479,7 @@ public class HRegion implements HeapSize { // , Writable{ long flushSize = this.htableDescriptor.getMemStoreFlushSize(); if (flushSize == HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE) { - flushSize = conf.getLong("hbase.hregion.memstore.flush.size", + flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE); } this.memstoreFlushSize = flushSize; diff --git src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java index 50e7fe0..03ed7f1 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java @@ -879,6 +879,9 @@ public class MemStore implements HeapSize { ClassSize.COPYONWRITE_ARRAYSET + ClassSize.COPYONWRITE_ARRAYLIST + (2 * ClassSize.CONCURRENT_SKIPLISTMAP)); + /** Used for readability when we don't store memstore timestamp in HFile */ + public static final boolean NO_PERSISTENT_TS = false; + /* * Calculate how the MemStore size has changed. Includes overhead of the * backing Map. diff --git src/main/java/org/apache/hadoop/hbase/regionserver/Store.java src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index b928731..b2512cc 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -48,9 +48,12 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.io.hfile.InvalidHFileException; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.StoreScanner.ScanType; import org.apache.hadoop.hbase.regionserver.compactions.CompactSelection; @@ -144,6 +147,7 @@ public class Store extends SchemaConfigured implements HeapSize { private final Compression.Algorithm compression; /** Compression algorithm for major compaction */ private final Compression.Algorithm compactionCompression; + private HFileDataBlockEncoder dataBlockEncoder; // Comparing KeyValues final KeyValue.KVComparator comparator; @@ -181,6 +185,11 @@ public class Store extends SchemaConfigured implements HeapSize { this.compactionCompression = (family.getCompactionCompression() != Compression.Algorithm.NONE) ? family.getCompactionCompression() : this.compression; + + this.dataBlockEncoder = + new HFileDataBlockEncoderImpl(family.getDataBlockEncodingOnDisk(), + family.getDataBlockEncoding()); + this.comparator = info.getComparator(); // getTimeToLive returns ttl in seconds. Convert to milliseconds. this.ttl = family.getTimeToLive(); @@ -270,6 +279,21 @@ public class Store extends SchemaConfigured implements HeapSize { public Path getHomedir() { return homedir; } + + /** + * @return the data block encoder + */ + public HFileDataBlockEncoder getDataBlockEncoder() { + return dataBlockEncoder; + } + + /** + * Should be used only in tests. + * @param blockEncoder the block delta encoder to use + */ + void setDataBlockEncoderInTest(HFileDataBlockEncoder blockEncoder) { + this.dataBlockEncoder = blockEncoder; + } /* * Creates an unsorted list of StoreFile loaded from the given directory. @@ -292,8 +316,9 @@ public class Store extends SchemaConfigured implements HeapSize { continue; } StoreFile curfile = new StoreFile(fs, p, this.conf, this.cacheConf, - this.family.getBloomFilterType()); + this.family.getBloomFilterType(), this.dataBlockEncoder); passSchemaMetricsTo(curfile); + curfile.createReader(); long length = curfile.getReader().length(); this.storeSize += length; @@ -447,8 +472,9 @@ public class Store extends SchemaConfigured implements HeapSize { StoreFile.rename(fs, srcPath, dstPath); StoreFile sf = new StoreFile(fs, dstPath, this.conf, this.cacheConf, - this.family.getBloomFilterType()); + this.family.getBloomFilterType(), this.dataBlockEncoder); passSchemaMetricsTo(sf); + sf.createReader(); LOG.info("Moved hfile " + srcPath + " into store directory " + @@ -555,7 +581,6 @@ public class Store extends SchemaConfigured implements HeapSize { MonitoredTask status) throws IOException { StoreFile.Writer writer; - String fileName; // Find the smallest read point across all the Scanners. long smallestReadPoint = region.getSmallestReadPoint(); long flushed = 0; @@ -651,8 +676,9 @@ public class Store extends SchemaConfigured implements HeapSize { status.setStatus("Flushing " + this + ": reopening flushed file"); StoreFile sf = new StoreFile(this.fs, dstPath, this.conf, this.cacheConf, - this.family.getBloomFilterType()); + this.family.getBloomFilterType(), this.dataBlockEncoder); passSchemaMetricsTo(sf); + StoreFile.Reader r = sf.createReader(); this.storeSize += r.length(); this.totalUncompressedBytes += r.getTotalUncompressedBytes(); @@ -690,7 +716,7 @@ public class Store extends SchemaConfigured implements HeapSize { Compression.Algorithm compression) throws IOException { StoreFile.Writer w = StoreFile.createWriter(fs, region.getTmpDir(), - blocksize, compression, comparator, conf, cacheConf, + blocksize, compression, dataBlockEncoder, comparator, conf, cacheConf, family.getBloomFilterType(), maxKeyCount); // The store file writer's path does not include the CF name, so we need // to configure the HFile writer directly. @@ -1299,7 +1325,8 @@ public class Store extends SchemaConfigured implements HeapSize { LOG.debug("Compacting " + file + ", keycount=" + keyCount + ", bloomtype=" + r.getBloomFilterType().toString() + - ", size=" + StringUtils.humanReadableInt(r.length()) ); + ", size=" + StringUtils.humanReadableInt(r.length()) + + ", encoding=" + r.getHFileReader().getEncodingOnDisk()); } } // For major compactions calculate the earliest put timestamp @@ -1416,7 +1443,8 @@ public class Store extends SchemaConfigured implements HeapSize { StoreFile storeFile = null; try { storeFile = new StoreFile(this.fs, path, this.conf, - this.cacheConf, this.family.getBloomFilterType()); + this.cacheConf, this.family.getBloomFilterType(), + NoOpDataBlockEncoder.INSTANCE); passSchemaMetricsTo(storeFile); storeFile.createReader(); } catch (IOException e) { @@ -1468,7 +1496,7 @@ public class Store extends SchemaConfigured implements HeapSize { " to " + destPath); } result = new StoreFile(this.fs, destPath, this.conf, this.cacheConf, - this.family.getBloomFilterType()); + this.family.getBloomFilterType(), this.dataBlockEncoder); passSchemaMetricsTo(result); result.createReader(); } @@ -1563,7 +1591,7 @@ public class Store extends SchemaConfigured implements HeapSize { /** * Find the key that matches row exactly, or the one that immediately - * preceeds it. WARNING: Only use this method on a table where writes occur + * precedes it. WARNING: Only use this method on a table where writes occur * with strictly increasing timestamps. This method assumes this pattern of * writes in order to make it reasonably performant. Also our search is * dependent on the axiom that deletes are for cells that are in the container @@ -2056,8 +2084,8 @@ public class Store extends SchemaConfigured implements HeapSize { } public static final long FIXED_OVERHEAD = - ClassSize.align(new SchemaConfigured().heapSize() - + (18 * ClassSize.REFERENCE) + (7 * Bytes.SIZEOF_LONG) + ClassSize.align(SchemaConfigured.SCHEMA_CONFIGURED_UNALIGNED_HEAP_SIZE + + + (19 * ClassSize.REFERENCE) + (7 * Bytes.SIZEOF_LONG) + (5 * Bytes.SIZEOF_INT) + Bytes.SIZEOF_BOOLEAN); public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD diff --git src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index bd72a29..fbabfe2 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.HalfStoreFileReader; import org.apache.hadoop.hbase.io.Reference; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.Compression; @@ -56,6 +57,8 @@ import org.apache.hadoop.hbase.io.hfile.HFileWriterV1; import org.apache.hadoop.hbase.io.hfile.HFileWriterV2; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.BloomFilterWriter; @@ -130,6 +133,10 @@ public class StoreFile extends SchemaConfigured { /** Key for timestamp of earliest-put in metadata*/ public static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS"); + /** Type of encoding used for data blocks in HFile. Stored in file info. */ + public static final byte[] DATA_BLOCK_ENCODING = + Bytes.toBytes("DATA_BLOCK_ENCODING"); + // Make default block size for StoreFiles 8k while testing. TODO: FIX! // Need to make it 8k for testing. public static final int DEFAULT_BLOCKSIZE_SMALL = 8 * 1024; @@ -148,7 +155,10 @@ public class StoreFile extends SchemaConfigured { // Block cache configuration and reference. private final CacheConfig cacheConf; - // HDFS blocks distribuion information + // What kind of data block encoding will be used + private HFileDataBlockEncoder dataBlockEncoder; + + // HDFS blocks distribution information private HDFSBlocksDistribution hdfsBlocksDistribution; // Keys for metadata stored in backing HFile. @@ -220,17 +230,23 @@ public class StoreFile extends SchemaConfigured { * as the Bloom filter type actually present in the HFile, because * column family configuration might change. If this is * {@link BloomType#NONE}, the existing Bloom filter is ignored. + * @param dataBlockEncoder data block encoding algorithm. * @throws IOException When opening the reader fails. */ StoreFile(final FileSystem fs, final Path p, final Configuration conf, final CacheConfig cacheConf, - final BloomType cfBloomType) + final BloomType cfBloomType, + final HFileDataBlockEncoder dataBlockEncoder) throws IOException { this.fs = fs; this.path = p; this.cacheConf = cacheConf; + if (dataBlockEncoder == null) { + throw new NullPointerException(); + } + this.dataBlockEncoder = dataBlockEncoder; if (isReference(p)) { this.reference = Reference.read(fs, p); this.referencePath = getReferredToFile(this.path); @@ -493,9 +509,11 @@ public class StoreFile extends SchemaConfigured { } if (isReference()) { this.reader = new HalfStoreFileReader(this.fs, this.referencePath, - this.cacheConf, this.reference); + this.cacheConf, this.reference, + dataBlockEncoder.getEncodingInCache()); } else { - this.reader = new Reader(this.fs, this.path, this.cacheConf); + this.reader = new Reader(this.fs, this.path, this.cacheConf, + dataBlockEncoder.getEncodingInCache()); } if (isSchemaConfigured()) { @@ -514,7 +532,7 @@ public class StoreFile extends SchemaConfigured { // By convention, if halfhfile, top half has a sequence number > bottom // half. Thats why we add one in below. Its done for case the two halves // are ever merged back together --rare. Without it, on open of store, - // since store files are distingushed by sequence id, the one half would + // since store files are distinguished by sequence id, the one half would // subsume the other. this.sequenceid = Bytes.toLong(b); if (isReference()) { @@ -598,11 +616,11 @@ public class StoreFile extends SchemaConfigured { } /** - * @param evictOnClose + * @param evictOnClose whether to evict blocks belonging to this file * @throws IOException */ public synchronized void closeReader(boolean evictOnClose) - throws IOException { + throws IOException { if (this.reader != null) { this.reader.close(evictOnClose); this.reader = null; @@ -677,8 +695,8 @@ public class StoreFile extends SchemaConfigured { public static Writer createWriter(final FileSystem fs, final Path dir, final int blocksize, Configuration conf, CacheConfig cacheConf) throws IOException { - return createWriter(fs, dir, blocksize, null, null, conf, cacheConf, - BloomType.NONE, 0); + return createWriter(fs, dir, blocksize, null, NoOpDataBlockEncoder.INSTANCE, + null, conf, cacheConf, BloomType.NONE, 0); } /** @@ -688,8 +706,9 @@ public class StoreFile extends SchemaConfigured { * @param dir Path to family directory. Makes the directory if doesn't exist. * Creates a file with a unique name in this directory. * @param blocksize - * @param algorithm Pass null to get default. - * @param c Pass null to get default. + * @param compressAlgo Compression algorithm. Pass null to get default. + * @param dataBlockEncoder Pass null to disable data block encoding. + * @param comparator Key-value comparator. Pass null to get default. * @param conf HBase system configuration. used with bloom filters * @param cacheConf Cache configuration and reference. * @param bloomType column family setting for bloom filters @@ -698,14 +717,11 @@ public class StoreFile extends SchemaConfigured { * @throws IOException */ public static StoreFile.Writer createWriter(final FileSystem fs, - final Path dir, - final int blocksize, - final Compression.Algorithm algorithm, - final KeyValue.KVComparator c, - final Configuration conf, - final CacheConfig cacheConf, - BloomType bloomType, - long maxKeyCount) + final Path dir, final int blocksize, + Compression.Algorithm compressAlgo, + final HFileDataBlockEncoder dataBlockEncoder, + KeyValue.KVComparator comparator, final Configuration conf, + final CacheConfig cacheConf, BloomType bloomType, long maxKeyCount) throws IOException { if (!fs.exists(dir)) { @@ -716,10 +732,14 @@ public class StoreFile extends SchemaConfigured { bloomType = BloomType.NONE; } - return new Writer(fs, path, blocksize, - algorithm == null? HFile.DEFAULT_COMPRESSION_ALGORITHM: algorithm, - conf, cacheConf, c == null ? KeyValue.COMPARATOR: c, bloomType, - maxKeyCount); + if (compressAlgo == null) { + compressAlgo = HFile.DEFAULT_COMPRESSION_ALGORITHM; + } + if (comparator == null) { + comparator = KeyValue.COMPARATOR; + } + return new Writer(fs, path, blocksize, compressAlgo, dataBlockEncoder, + conf, cacheConf, comparator, bloomType, maxKeyCount); } /** @@ -814,6 +834,8 @@ public class StoreFile extends SchemaConfigured { private KeyValue lastDeleteFamilyKV = null; private long deleteFamilyCnt = 0; + protected HFileDataBlockEncoder dataBlockEncoder; + TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); /* isTimeRangeTrackerSet keeps track if the timeRange has already been set * When flushing a memstore, we set TimeRange and use this variable to @@ -838,13 +860,16 @@ public class StoreFile extends SchemaConfigured { * @throws IOException problem writing to FS */ public Writer(FileSystem fs, Path path, int blocksize, - Compression.Algorithm compress, final Configuration conf, + Compression.Algorithm compress, + HFileDataBlockEncoder dataBlockEncoder, final Configuration conf, CacheConfig cacheConf, final KVComparator comparator, BloomType bloomType, long maxKeys) throws IOException { + this.dataBlockEncoder = dataBlockEncoder != null ? + dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; writer = HFile.getWriterFactory(conf, cacheConf).createWriter( fs, path, blocksize, - compress, comparator.getRawComparator()); + compress, this.dataBlockEncoder, comparator.getRawComparator()); this.kvComparator = comparator; @@ -1081,6 +1106,9 @@ public class StoreFile extends SchemaConfigured { } public void close() throws IOException { + // Save data block encoder metadata in the file info. + dataBlockEncoder.saveMetadata(this); + boolean hasGeneralBloom = this.closeGeneralBloomFilter(); boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter(); @@ -1119,10 +1147,11 @@ public class StoreFile extends SchemaConfigured { private byte[] lastBloomKey; private long deleteFamilyCnt = -1; - public Reader(FileSystem fs, Path path, CacheConfig cacheConf) - throws IOException { + public Reader(FileSystem fs, Path path, CacheConfig cacheConf, + DataBlockEncoding preferredEncodingInCache) throws IOException { super(path); - reader = HFile.createReader(fs, path, cacheConf); + reader = HFile.createReaderWithEncoding(fs, path, cacheConf, + preferredEncodingInCache); bloomFilterType = BloomType.NONE; } @@ -1262,7 +1291,7 @@ public class StoreFile extends SchemaConfigured { default: return true; - } + } } public boolean passesDeleteFamilyBloomFilter(byte[] row, int rowOffset, @@ -1312,7 +1341,7 @@ public class StoreFile extends SchemaConfigured { return true; byte[] key; - switch (bloomFilterType) { + switch (bloomFilterType) { case ROW: if (col != null) { throw new RuntimeException("Row-only Bloom filter called with " + diff --git src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index 9fa4b34..49f8e81 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -154,8 +154,8 @@ class StoreFileScanner implements KeyValueScanner { } finally { realSeekDone = true; } - } catch(IOException ioe) { - throw new IOException("Could not seek " + this, ioe); + } catch (IOException ioe) { + throw new IOException("Could not seek " + this + " to key " + key, ioe); } } @@ -175,7 +175,8 @@ class StoreFileScanner implements KeyValueScanner { realSeekDone = true; } } catch (IOException ioe) { - throw new IOException("Could not seek " + this, ioe); + throw new IOException("Could not reseek " + this + " to key " + key, + ioe); } } diff --git src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java index ae45a26..4821b53 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java @@ -83,6 +83,15 @@ public class SchemaConfigured implements HeapSize, SchemaAware { } /** + * Creates an instance corresponding to an unknown table and column family. + * Used in unit tests. + */ + public static SchemaConfigured createUnknown() { + return new SchemaConfigured(null, SchemaMetrics.UNKNOWN, + SchemaMetrics.UNKNOWN); + } + + /** * Default constructor. Only use when column/family name are not known at * construction (i.e. for HFile blocks). */ diff --git src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java new file mode 100644 index 0000000..4e03141 --- /dev/null +++ src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.io.encoding. + EncoderBufferTooSmallException; +import org.apache.hadoop.io.WritableUtils; + +/** + * Utility functions for working with byte buffers, such as reading/writing + * variable-length long numbers. + */ +public final class ByteBufferUtils { + + // "Compressed integer" serialization helper constants. + private final static int VALUE_MASK = 0x7f; + private final static int NEXT_BIT_SHIFT = 7; + private final static int NEXT_BIT_MASK = 1 << 7; + + private ByteBufferUtils() { + } + + /** + * Similar to {@link WritableUtils#writeVLong(java.io.DataOutput, long)}, + * but writes to a {@link ByteBuffer}. + */ + public static void writeVLong(ByteBuffer out, long i) { + if (i >= -112 && i <= 127) { + out.put((byte) i); + return; + } + + int len = -112; + if (i < 0) { + i ^= -1L; // take one's complement + len = -120; + } + + long tmp = i; + while (tmp != 0) { + tmp = tmp >> 8; + len--; + } + + out.put((byte) len); + + len = (len < -120) ? -(len + 120) : -(len + 112); + + for (int idx = len; idx != 0; idx--) { + int shiftbits = (idx - 1) * 8; + long mask = 0xFFL << shiftbits; + out.put((byte) ((i & mask) >> shiftbits)); + } + } + + /** + * Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a + * {@link ByteBuffer}. + */ + public static long readVLong(ByteBuffer in) { + byte firstByte = in.get(); + int len = WritableUtils.decodeVIntSize(firstByte); + if (len == 1) { + return firstByte; + } + long i = 0; + for (int idx = 0; idx < len-1; idx++) { + byte b = in.get(); + i = i << 8; + i = i | (b & 0xFF); + } + return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); + } + + + /** + * Put in buffer integer using 7 bit encoding. For each written byte: + * 7 bits are used to store value + * 1 bit is used to indicate whether there is next bit. + * @param value Int to be compressed. + * @param out Where to put compressed data + * @return Number of bytes written. + * @throws IOException on stream error + */ + public static int putCompressedInt(OutputStream out, final int value) + throws IOException { + int i = 0; + int tmpvalue = value; + do { + byte b = (byte) (tmpvalue & VALUE_MASK); + tmpvalue >>>= NEXT_BIT_SHIFT; + if (tmpvalue != 0) { + b |= (byte) NEXT_BIT_MASK; + } + out.write(b); + i++; + } while (tmpvalue != 0); + return i; + } + + /** + * Put in output stream 32 bit integer (Big Endian byte order). + * @param out Where to put integer. + * @param value Value of integer. + * @throws IOException On stream error. + */ + public static void putInt(OutputStream out, final int value) + throws IOException { + for (int i = Bytes.SIZEOF_INT - 1 ; i >= 0 ; --i) { + out.write((byte) (value >>> (i * 8))); + } + } + + /** + * Copy byte to the output stream. + * @param b byte to be copied + * @param out Where to put compressed data + * @return Number of written bytes. + * @throws IOException on stream error + */ + public static int copyToStream(OutputStream out, byte b) + throws IOException { + out.write(b); + return Bytes.SIZEOF_BYTE; + } + + /** + * Copy the data to the output stream and update position in buffer. + * @param out Write bytes here. + * @param buffer Source buffer in certain position. + * @param length Length of copy. + * @return Number of written bytes. + * @throws IOException on stream error + */ + public static int copyToStream(OutputStream out, ByteBuffer buffer, + int length) throws IOException { + if (buffer.hasArray()) { + out.write(buffer.array(), buffer.arrayOffset() + buffer.position(), + length); + skip(buffer, length); + } else { + for (int i = 0 ; i < length ; ++i) { + out.write(buffer.get()); + } + } + return length; + } + + /** + * Copy the data to the output stream + * @param out Write bytes here. + * @param in Source buffer in certain position. + * @param offset In the buffer from position 0. + * @param length Length of copy. + * @return Number of written bytes. + * @throws IOException on stream error + */ + public static int copyToStream(OutputStream out, ByteBuffer in, + int offset, int length) throws IOException { + if (in.hasArray()) { + out.write(in.array(), in.arrayOffset() + offset, + length); + } else { + for (int i = 0 ; i < length ; ++i) { + out.write(in.get(offset + i)); + } + } + return length; + } + + public static int putLong(OutputStream out, final long length, + final int fitInBytes) throws IOException { + long tmpLength = length; + for (int i = 0 ; i < fitInBytes ; ++i) { + out.write((byte) (tmpLength & 0xff)); + tmpLength >>>= 8; + } + return fitInBytes; + } + + /** + * Check how many bytes are required to store value. + * @param value Value which size will be tested. + * @return How many bytes are required to store value. + */ + public static int longFitsIn(final long value) { + if (value < 0) { + return 8; + } + + if (value < (1l << 4 * 8)) { + // no more than 4 bytes + if (value < (1l << 2 * 8)) { + if (value < (1l << 1 * 8)) { + return 1; + } + return 2; + } + if (value < (1l << 3 * 8)) { + return 3; + } + return 4; + } + // more than 4 bytes + if (value < (1l << 6 * 8)) { + if (value < (1l << 5 * 8)) { + return 5; + } + return 6; + } + if (value < (1l << 7 * 8)) { + return 7; + } + return 8; + } + + /** + * Check how many bytes is required to store value. + * @param value Value which size will be tested. + * @return How many bytes are required to store value. + */ + public static int intFitsIn(final int value) { + if (value < 0) { + return 4; + } + + if (value < (1 << 2 * 8)) { + if (value < (1 << 1 * 8)) { + return 1; + } + return 2; + } + if (value <= (1 << 3 * 8)) { + return 3; + } + return 4; + } + + /** + * Read integer from stream coded in 7 bits and increment position. + * @return Read integer. + * @throws IOException + */ + public static int readCompressedInt(InputStream input) + throws IOException { + int result = 0; + int i = 0; + byte b; + do { + b = (byte) input.read(); + result += (b & VALUE_MASK) << (NEXT_BIT_SHIFT * i); + i++; + if (i > Bytes.SIZEOF_INT + 1) { + throw new IllegalStateException( + "Corrupted compressed int (too long: " + (i + 1) + " bytes)"); + } + } while (0 != (b & NEXT_BIT_MASK)); + return result; + } + + /** + * Read integer from buffer coded in 7 bits and increment position. + * @return Read integer. + */ + public static int readCompressedInt(ByteBuffer buffer) { + byte b = buffer.get(); + if ((b & NEXT_BIT_MASK) != 0) { + return (b & VALUE_MASK) + (readCompressedInt(buffer) << NEXT_BIT_SHIFT); + } + return b & VALUE_MASK; + } + + /** + * Read long which was written to fitInBytes bytes and increment position. + * @param fitInBytes In how many bytes given long is stored. + * @return The value of parsed long. + * @throws IOException + */ + public static long readLong(InputStream input, final int fitInBytes) + throws IOException { + long tmpLong = 0; + for (int i = 0 ; i < fitInBytes ; ++i) { + tmpLong |= (input.read() & 0xffl) << (8 * i); + } + return tmpLong; + } + + /** + * Read long which was written to fitInBytes bytes and increment position. + * @param fitInBytes In how many bytes given long is stored. + * @return The value of parsed long. + */ + public static long readLong(ByteBuffer buffer, final int fitInBytes) { + long tmpLength = 0; + for (int i = 0 ; i < fitInBytes ; ++i) { + tmpLength |= (buffer.get() & 0xffl) << (8l * i); + } + return tmpLength; + } + + /** + * Asserts that we have 'length' bytes remaining in 'buffer'. + * @param buffer Where are we looking for remaining bytes. + * @param length How many bytes do we need. + * @throws EncoderBufferTooSmallException If there are no enough bytes. + */ + public static void ensureSpace(ByteBuffer buffer, int length) + throws EncoderBufferTooSmallException { + if (buffer.position() + length > buffer.limit()) { + throw new EncoderBufferTooSmallException( + "Buffer position=" + buffer.position() + + ", buffer limit=" + buffer.limit() + + ", length to be written=" + length); + } + } + + /** + * Copy 'length' bytes from 'source' and put it at the current position of + * 'buffer'. Update position in 'buffer' afterwards. + * @param source From where data should be read. + * @param buffer Write data here. + * @param length Read that many bytes. + * @throws IOException If there is problem in source. + */ + public static void copyFromStream(DataInputStream source, + ByteBuffer buffer, int length) throws IOException { + if (buffer.hasArray()) { + source.readFully(buffer.array(), buffer.position() + buffer.arrayOffset(), + length); + skip(buffer, length); + } else { + for (int i = 0 ; i < length ; ++i) { + buffer.put(source.readByte()); + } + } + } + + /** + * Copy from one buffer to another from given offset + * @param source From where copy. + * @param destination Where to copy. + * @param sourceOffset Offset in the source buffer + * @param length How many bytes will be copied. + * @throws IOException + */ + public static void copyFromBuffer(ByteBuffer source, + ByteBuffer destination, int sourceOffset, int length) { + if (source.hasArray() && destination.hasArray()) { + System.arraycopy(source.array(), sourceOffset + source.arrayOffset(), + destination.array(), destination.position() + + destination.arrayOffset(), length); + skip(destination, length); + } else { + for (int i = 0 ; i < length ; ++i) { + destination.put(source.get(sourceOffset + i)); + } + } + } + + /** + * Find length of common prefix of two parts in the buffer + * @param buffer Where parts are located. + * @param offsetLeft Offset of the first part. + * @param offsetRight Offset of the second part. + * @param limit Maximal length of common prefix. + * @return Length of prefix. + */ + public static int findCommonPrefix(ByteBuffer buffer, int offsetLeft, + int offsetRight, int limit) { + int prefix = 0; + + for (; prefix < limit ; ++prefix) { + if (buffer.get(offsetLeft + prefix) != buffer.get(offsetRight + prefix)) { + break; + } + } + + return prefix; + } + + /** + * Find length of common prefix in two arrays. + * @param left Array to be compared. + * @param leftOffset Offset in left array. + * @param leftLength Length of left array. + * @param right Array to be compared. + * @param rightArray Offset in right array. + * @param rightLength Length of right array. + */ + public static int findCommonPrefix( + byte[] left, int leftOffset, int leftLength, + byte[] right, int rightOffset, int rightLength) { + int length = Math.min(leftLength, rightLength); + int result = 0; + + while (result < length && + left[leftOffset + result] == right[rightOffset + result]) { + result++; + } + + return result; + } + + /** + * Check whether two parts in the same buffer are equal. + * @param buffer In which buffer there are parts + * @param offsetLeft Beginning of first part. + * @param lengthLeft Length of the first part. + * @param offsetRight Beginning of the second part. + * @param lengthRight Length of the second part. + * @return + */ + public static boolean arePartsEqual(ByteBuffer buffer, + int offsetLeft, int lengthLeft, + int offsetRight, int lengthRight) { + if (lengthLeft != lengthRight) { + return false; + } + + if (buffer.hasArray()) { + return 0 == Bytes.compareTo( + buffer.array(), buffer.arrayOffset() + offsetLeft, lengthLeft, + buffer.array(), buffer.arrayOffset() + offsetRight, lengthRight); + } + + for (int i = 0 ; i < lengthRight ; ++i) { + if (buffer.get(offsetLeft + i) != buffer.get(offsetRight + i)) { + return false; + } + } + return true; + } + + /** + * Increment position in buffer. + * @param buffer In this buffer. + * @param length By that many bytes. + */ + public static void skip(ByteBuffer buffer, int length) { + buffer.position(buffer.position() + length); + } + + /** + * Read int, assuming it is stored in N bytes with no special encoding. + * @param source From where read bytes. + * @param intLength How long is the integer + * @return The value of the integer. + * @throws IOException On IO error. + */ + public static int readCompressedInt(InputStream source, int intLength) + throws IOException { + int result = 0; + for (int i = 0 ; i < intLength ; ++i) { + result = (result << 8) + (source.read() & 0xff); + } + return result; + } + + /** + * Read int, assuming it is stored in N bytes with no special encoding. + * @param buffer Read bytes from this buffer. + * @param intLength The lenght of the integer in bytes. + * @return The value of the integer. + */ + public static int readCompressedInt(ByteBuffer buffer, int intLength) { + int result = 0; + for (int i = 0 ; i < intLength ; ++i) { + result = (result << 8) + (buffer.get() & 0xff); + } + return result; + } + +} diff --git src/main/ruby/hbase/admin.rb src/main/ruby/hbase/admin.rb index 72c8259..601e7e1 100644 --- src/main/ruby/hbase/admin.rb +++ src/main/ruby/hbase/admin.rb @@ -532,6 +532,8 @@ module Hbase family.setInMemory(JBoolean.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::IN_MEMORY])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::IN_MEMORY) family.setTimeToLive(JInteger.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::TTL])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::TTL) family.setCompressionType(org.apache.hadoop.hbase.io.hfile.Compression::Algorithm.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::COMPRESSION])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::COMPRESSION) + family.setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::DATA_BLOCK_ENCODING])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::DATA_BLOCK_ENCODING) + family.setEncodeOnDisk(JBoolean.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::ENCODE_ON_DISK])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::ENCODE_ON_DISK) family.setBlocksize(JInteger.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::BLOCKSIZE])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::BLOCKSIZE) family.setMaxVersions(JInteger.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::VERSIONS])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::VERSIONS) family.setMinVersions(JInteger.valueOf(arg[org.apache.hadoop.hbase.HColumnDescriptor::MIN_VERSIONS])) if arg.include?(org.apache.hadoop.hbase.HColumnDescriptor::MIN_VERSIONS) diff --git src/test/java/org/apache/hadoop/hbase/BROKE_TODO_FIX_TestAcidGuarantees.java src/test/java/org/apache/hadoop/hbase/BROKE_TODO_FIX_TestAcidGuarantees.java index 6741acc..efe3edd 100644 --- src/test/java/org/apache/hadoop/hbase/BROKE_TODO_FIX_TestAcidGuarantees.java +++ src/test/java/org/apache/hadoop/hbase/BROKE_TODO_FIX_TestAcidGuarantees.java @@ -74,7 +74,7 @@ public class BROKE_TODO_FIX_TestAcidGuarantees { public BROKE_TODO_FIX_TestAcidGuarantees() { // Set small flush size for minicluster so we exercise reseeking scanners Configuration conf = HBaseConfiguration.create(); - conf.set("hbase.hregion.memstore.flush.size", String.valueOf(128*1024)); + conf.set(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, String.valueOf(128*1024)); util = new HBaseTestingUtility(conf); } diff --git src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java index 0b1d258..e3ddc29 100644 --- src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java +++ src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java @@ -221,18 +221,30 @@ public abstract class HBaseTestCase extends TestCase { final int minVersions, final int versions, final int ttl, boolean keepDeleted) { HTableDescriptor htd = new HTableDescriptor(name); htd.addFamily(new HColumnDescriptor(fam1, minVersions, versions, - keepDeleted, HColumnDescriptor.DEFAULT_COMPRESSION, false, false, - HColumnDescriptor.DEFAULT_BLOCKSIZE, ttl, - HColumnDescriptor.DEFAULT_BLOOMFILTER, - HConstants.REPLICATION_SCOPE_LOCAL)); + keepDeleted, + HColumnDescriptor.DEFAULT_COMPRESSION, + HColumnDescriptor.DEFAULT_ENCODE_ON_DISK, + HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING, + false, false, + HColumnDescriptor.DEFAULT_BLOCKSIZE, ttl, + HColumnDescriptor.DEFAULT_BLOOMFILTER, + HConstants.REPLICATION_SCOPE_LOCAL)); htd.addFamily(new HColumnDescriptor(fam2, minVersions, versions, - keepDeleted, HColumnDescriptor.DEFAULT_COMPRESSION, false, false, + keepDeleted, + HColumnDescriptor.DEFAULT_COMPRESSION, + HColumnDescriptor.DEFAULT_ENCODE_ON_DISK, + HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING, + false, false, HColumnDescriptor.DEFAULT_BLOCKSIZE, ttl, HColumnDescriptor.DEFAULT_BLOOMFILTER, HConstants.REPLICATION_SCOPE_LOCAL)); htd.addFamily(new HColumnDescriptor(fam3, minVersions, versions, - keepDeleted, HColumnDescriptor.DEFAULT_COMPRESSION, false, false, - HColumnDescriptor.DEFAULT_BLOCKSIZE, ttl, + keepDeleted, + HColumnDescriptor.DEFAULT_COMPRESSION, + HColumnDescriptor.DEFAULT_ENCODE_ON_DISK, + HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING, + false, false, + HColumnDescriptor.DEFAULT_BLOCKSIZE, ttl, HColumnDescriptor.DEFAULT_BLOOMFILTER, HConstants.REPLICATION_SCOPE_LOCAL)); return htd; diff --git src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index b2a5634..825b371 100644 --- src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.master.HMaster; @@ -1879,9 +1880,12 @@ public class HBaseTestingUtility { * @return the number of regions the table was split into */ public static int createPreSplitLoadTestTable(Configuration conf, - byte[] tableName, byte[] columnFamily) throws IOException { + byte[] tableName, byte[] columnFamily, + DataBlockEncoding dataBlockEncoding) throws IOException { HTableDescriptor desc = new HTableDescriptor(tableName); - desc.addFamily(new HColumnDescriptor(columnFamily)); + HColumnDescriptor hcd = new HColumnDescriptor(columnFamily); + hcd.setDataBlockEncoding(dataBlockEncoding); + desc.addFamily(hcd); int totalNumberOfRegions = 0; try { @@ -1924,7 +1928,8 @@ public class HBaseTestingUtility { public HRegion createTestRegion(String tableName, String cfName, Compression.Algorithm comprAlgo, BloomType bloomType, int maxVersions, - boolean blockCacheEnabled, int blockSize) throws IOException { + boolean blockCacheEnabled, int blockSize, DataBlockEncoding encoding) + throws IOException { HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cfName), maxVersions, comprAlgo.getName(), @@ -1933,6 +1938,7 @@ public class HBaseTestingUtility { HColumnDescriptor.DEFAULT_TTL, bloomType.toString()); hcd.setBlocksize(HFile.DEFAULT_BLOCKSIZE); + hcd.setDataBlockEncoding(encoding); HTableDescriptor htd = new HTableDescriptor(tableName); htd.addFamily(hcd); HRegionInfo info = diff --git src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java index a9ef57a..4099f40 100644 --- src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java +++ src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java @@ -191,7 +191,8 @@ public class HFilePerformanceEvaluation { void setUp() throws Exception { writer = HFile.getWriterFactoryNoCache(conf).createWriter(this.fs, - this.mf, RFILE_BLOCKSIZE, (Compression.Algorithm) null, null); + this.mf, RFILE_BLOCKSIZE, (Compression.Algorithm) null, null, + null); } @Override @@ -365,4 +366,4 @@ public class HFilePerformanceEvaluation { public static void main(String[] args) throws Exception { new HFilePerformanceEvaluation().runBenchmarks(); } -} \ No newline at end of file +} diff --git src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java index 2222d2e..4643e6a 100644 --- src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java +++ src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java @@ -76,7 +76,7 @@ public class TestAcidGuarantees { public TestAcidGuarantees() { // Set small flush size for minicluster so we exercise reseeking scanners Configuration conf = HBaseConfiguration.create(); - conf.set("hbase.hregion.memstore.flush.size", String.valueOf(128*1024)); + conf.set(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, String.valueOf(128*1024)); util = new HBaseTestingUtility(conf); } diff --git src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java index bb077d0..ae293a2 100644 --- src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java +++ src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java @@ -1379,7 +1379,7 @@ public class TestAdmin { "hbase.hregion.memstore.optionalflushcount", 2); // We flush the cache after every 8192 bytes - TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.flush.size", + TEST_UTIL.getConfiguration().setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192); // Increase the amount of time between client retries diff --git src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index ab80020..30499c9 100644 --- src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -143,6 +143,8 @@ public class TestFromClientSide { HColumnDescriptor.DEFAULT_VERSIONS, true, HColumnDescriptor.DEFAULT_COMPRESSION, + HColumnDescriptor.DEFAULT_ENCODE_ON_DISK, + HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING, HColumnDescriptor.DEFAULT_IN_MEMORY, HColumnDescriptor.DEFAULT_BLOCKCACHE, HColumnDescriptor.DEFAULT_BLOCKSIZE, diff --git src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java index e3061d0..acc13d0 100644 --- src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java +++ src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.util.Bytes; @@ -97,8 +98,8 @@ public class TestHalfStoreFileReader { private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf) throws IOException { - final HalfStoreFileReader halfreader = - new HalfStoreFileReader(fs, p, cacheConf, bottom); + final HalfStoreFileReader halfreader = new HalfStoreFileReader(fs, p, + cacheConf, bottom, DataBlockEncoding.NONE); halfreader.loadFileInfo(); final HFileScanner scanner = halfreader.getScanner(false, false); diff --git src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java index 8b7d016..a3c9ae9 100644 --- src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java +++ src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java @@ -40,6 +40,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; import org.apache.hadoop.hbase.io.hfile.CachedBlock; import org.apache.hadoop.hbase.io.hfile.LruBlockCache; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -295,6 +296,12 @@ public class TestHeapSize extends TestCase { assertEquals(expected, actual); } + // SchemaConfigured + LOG.debug("Heap size for: " + SchemaConfigured.class.getName()); + SchemaConfigured sc = new SchemaConfigured(null, "myTable", "myCF"); + assertEquals(ClassSize.estimateBase(SchemaConfigured.class, true), + sc.heapSize()); + // Store Overhead cl = Store.class; actual = Store.FIXED_OVERHEAD; @@ -313,16 +320,23 @@ public class TestHeapSize extends TestCase { assertEquals(expected, actual); } + // Block cache key overhead + cl = BlockCacheKey.class; + // Passing zero length file name, because estimateBase does not handle + // deep overhead. + actual = new BlockCacheKey("", 0).heapSize(); + expected = ClassSize.estimateBase(cl, false); + if (expected != actual) { + ClassSize.estimateBase(cl, true); + assertEquals(expected, actual); + } + // Currently NOT testing Deep Overheads of many of these classes. // Deep overheads cover a vast majority of stuff, but will not be 100% // accurate because it's unclear when we're referencing stuff that's already // accounted for. But we have satisfied our two core requirements. // Sizing is quite accurate now, and our tests will throw errors if // any of these classes are modified without updating overhead sizes. - - SchemaConfigured sc = new SchemaConfigured(null, "myTable", "myCF"); - assertEquals(ClassSize.estimateBase(SchemaConfigured.class, true), - sc.heapSize()); } @org.junit.Rule diff --git src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java new file mode 100644 index 0000000..b2fe516 --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.io.WritableUtils; + +/** + * Generate list of key values which are very useful to test data block encoding + * and compression. + */ +public class RedundantKVGenerator { + // row settings + static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10; + static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6; + static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3; + static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3; + static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3; + static int DEFAULT_NUMBER_OF_ROW = 500; + + // qualifier + static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f; + static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f; + static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9; + static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3; + + static int DEFAULT_COLUMN_FAMILY_LENGTH = 9; + static int DEFAULT_VALUE_LENGTH = 8; + static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f; + + static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000; + static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000; + + /** + * Default constructor, assumes all parameters from class constants. + */ + public RedundantKVGenerator() { + this(new Random(42L), + DEFAULT_NUMBER_OF_ROW_PREFIXES, + DEFAULT_AVERAGE_PREFIX_LENGTH, + DEFAULT_PREFIX_LENGTH_VARIANCE, + DEFAULT_AVERAGE_SUFFIX_LENGTH, + DEFAULT_SUFFIX_LENGTH_VARIANCE, + DEFAULT_NUMBER_OF_ROW, + + DEFAULT_CHANCE_FOR_SAME_QUALIFIER, + DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER, + DEFAULT_AVERAGE_QUALIFIER_LENGTH, + DEFAULT_QUALIFIER_LENGTH_VARIANCE, + + DEFAULT_COLUMN_FAMILY_LENGTH, + DEFAULT_VALUE_LENGTH, + DEFAULT_CHANCE_FOR_ZERO_VALUE, + + DEFAULT_BASE_TIMESTAMP_DIVIDE, + DEFAULT_TIMESTAMP_DIFF_SIZE + ); + } + + + /** + * Various configuration options for generating key values + * @param randomizer pick things by random + */ + public RedundantKVGenerator(Random randomizer, + int numberOfRowPrefixes, + int averagePrefixLength, + int prefixLengthVariance, + int averageSuffixLength, + int suffixLengthVariance, + int numberOfRows, + + float chanceForSameQualifier, + float chanceForSimiliarQualifier, + int averageQualifierLength, + int qualifierLengthVariance, + + int columnFamilyLength, + int valueLength, + float chanceForZeroValue, + + int baseTimestampDivide, + int timestampDiffSize + ) { + this.randomizer = randomizer; + + this.numberOfRowPrefixes = numberOfRowPrefixes; + this.averagePrefixLength = averagePrefixLength; + this.prefixLengthVariance = prefixLengthVariance; + this.averageSuffixLength = averageSuffixLength; + this.suffixLengthVariance = suffixLengthVariance; + this.numberOfRows = numberOfRows; + + this.chanceForSameQualifier = chanceForSameQualifier; + this.chanceForSimiliarQualifier = chanceForSimiliarQualifier; + this.averageQualifierLength = averageQualifierLength; + this.qualifierLengthVariance = qualifierLengthVariance; + + this.columnFamilyLength = columnFamilyLength; + this.valueLength = valueLength; + this.chanceForZeroValue = chanceForZeroValue; + + this.baseTimestampDivide = baseTimestampDivide; + this.timestampDiffSize = timestampDiffSize; + } + + /** Used to generate dataset */ + private Random randomizer; + + // row settings + private int numberOfRowPrefixes; + private int averagePrefixLength = 6; + private int prefixLengthVariance = 3; + private int averageSuffixLength = 3; + private int suffixLengthVariance = 3; + private int numberOfRows = 500; + + // qualifier + private float chanceForSameQualifier = 0.5f; + private float chanceForSimiliarQualifier = 0.4f; + private int averageQualifierLength = 9; + private int qualifierLengthVariance = 3; + + private int columnFamilyLength = 9; + private int valueLength = 8; + private float chanceForZeroValue = 0.5f; + + private int baseTimestampDivide = 1000000; + private int timestampDiffSize = 100000000; + + private List generateRows() { + // generate prefixes + List prefixes = new ArrayList(); + prefixes.add(new byte[0]); + for (int i = 1 ; i < numberOfRowPrefixes ; ++i) { + int prefixLength = averagePrefixLength; + prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) - + prefixLengthVariance; + byte[] newPrefix = new byte[prefixLength]; + randomizer.nextBytes(newPrefix); + prefixes.add(newPrefix); + } + + // generate rest of the row + List rows = new ArrayList(); + for (int i = 0 ; i < numberOfRows ; ++i) { + int suffixLength = averageSuffixLength; + suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) - + suffixLengthVariance; + int randomPrefix = randomizer.nextInt(prefixes.size()); + byte[] row = new byte[prefixes.get(randomPrefix).length + + suffixLength]; + rows.add(row); + } + + return rows; + } + + /** + * Generate test data useful to test encoders. + * @param howMany How many Key values should be generated. + * @return sorted list of key values + */ + public List generateTestKeyValues(int howMany) { + List result = new ArrayList(); + + List rows = generateRows(); + Map> rowsToQualifier = + new HashMap>(); + + byte[] family = new byte[columnFamilyLength]; + randomizer.nextBytes(family); + + long baseTimestamp = Math.abs(randomizer.nextLong()) / + baseTimestampDivide; + + byte[] value = new byte[valueLength]; + + for (int i = 0 ; i < howMany ; ++i) { + long timestamp = baseTimestamp + randomizer.nextInt( + timestampDiffSize); + Integer rowId = randomizer.nextInt(rows.size()); + byte[] row = rows.get(rowId); + + // generate qualifier, sometimes it is same, sometimes similar, + // occasionally completely different + byte[] qualifier; + float qualifierChance = randomizer.nextFloat(); + if (!rowsToQualifier.containsKey(rowId) || + qualifierChance > chanceForSameQualifier + + chanceForSimiliarQualifier) { + int qualifierLength = averageQualifierLength; + qualifierLength += + randomizer.nextInt(2 * qualifierLengthVariance + 1) - + qualifierLengthVariance; + qualifier = new byte[qualifierLength]; + randomizer.nextBytes(qualifier); + + // add it to map + if (!rowsToQualifier.containsKey(rowId)) { + rowsToQualifier.put(rowId, new ArrayList()); + } + rowsToQualifier.get(rowId).add(qualifier); + } else if (qualifierChance > chanceForSameQualifier) { + // similar qualifier + List previousQualifiers = rowsToQualifier.get(rowId); + byte[] originalQualifier = previousQualifiers.get( + randomizer.nextInt(previousQualifiers.size())); + + qualifier = new byte[originalQualifier.length]; + int commonPrefix = randomizer.nextInt(qualifier.length); + System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); + for (int j = commonPrefix ; j < qualifier.length ; ++j) { + qualifier[j] = (byte) (randomizer.nextInt() & 0xff); + } + + rowsToQualifier.get(rowId).add(qualifier); + } else { + // same qualifier + List previousQualifiers = rowsToQualifier.get(rowId); + qualifier = previousQualifiers.get( + randomizer.nextInt(previousQualifiers.size())); + } + + if (randomizer.nextFloat() < chanceForZeroValue) { + for (int j = 0 ; j < value.length ; ++j) { + value[j] = (byte) 0; + } + } else { + randomizer.nextBytes(value); + } + + result.add(new KeyValue(row, family, qualifier, timestamp, value)); + } + + Collections.sort(result, KeyValue.COMPARATOR); + + return result; + } + + /** + * Convert list of KeyValues to byte buffer. + * @param keyValues list of KeyValues to be converted. + * @return buffer with content from key values + */ + public static ByteBuffer convertKvToByteBuffer(List keyValues, + boolean includesMemstoreTS) { + int totalSize = 0; + for (KeyValue kv : keyValues) { + totalSize += kv.getLength(); + if (includesMemstoreTS) { + totalSize += WritableUtils.getVIntSize(kv.getMemstoreTS()); + } + } + + ByteBuffer result = ByteBuffer.allocate(totalSize); + for (KeyValue kv : keyValues) { + result.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); + if (includesMemstoreTS) { + ByteBufferUtils.writeVLong(result, kv.getMemstoreTS()); + } + } + + return result; + } + +} \ No newline at end of file diff --git src/test/java/org/apache/hadoop/hbase/io/encoding/TestBufferedDataBlockEncoder.java src/test/java/org/apache/hadoop/hbase/io/encoding/TestBufferedDataBlockEncoder.java new file mode 100644 index 0000000..2e7de2f --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/io/encoding/TestBufferedDataBlockEncoder.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestBufferedDataBlockEncoder { + + @Test + public void testEnsureSpaceForKey() { + BufferedDataBlockEncoder.SeekerState state = + new BufferedDataBlockEncoder.SeekerState(); + for (int i = 1; i <= 65536; ++i) { + state.keyLength = i; + state.ensureSpaceForKey(); + state.keyBuffer[state.keyLength - 1] = (byte) ((i - 1) % 0xff); + for (int j = 0; j < i - 1; ++j) { + // Check that earlier bytes were preserved as the buffer grew. + assertEquals((byte) (j % 0xff), state.keyBuffer[j]); + } + } + } + +} diff --git src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java new file mode 100644 index 0000000..bb9eaae --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests changing data block encoding settings of a column family. + */ +@Category(LargeTests.class) +public class TestChangingEncoding { + + private static final Log LOG = LogFactory.getLog(TestChangingEncoding.class); + + private static final String CF = "ChangeEncCF"; + private static final byte[] CF_BYTES = Bytes.toBytes(CF); + private static final int NUM_ROWS_PER_BATCH = 100; + private static final int NUM_COLS_PER_ROW = 20; + + private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final Configuration conf = TEST_UTIL.getConfiguration(); + + private HBaseAdmin admin; + private HColumnDescriptor hcd; + + private String tableName; + + /** A zero-based index of the current batch of test data being written */ + private int numBatchesWritten; + + private void prepareTest(String testId) throws IOException { + tableName = "test_table_" + testId; + HTableDescriptor htd = new HTableDescriptor(tableName); + hcd = new HColumnDescriptor(CF); + htd.addFamily(hcd); + admin.createTable(htd); + numBatchesWritten = 0; + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + // Use a small flush size to create more HFiles. + conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024); + TEST_UTIL.startMiniCluster(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setUp() throws Exception { + admin = new HBaseAdmin(conf); + } + + @After + public void tearDown() throws IOException { + admin.close(); + } + + private static byte[] getRowKey(int batchId, int i) { + return Bytes.toBytes("batch" + batchId + "_row" + i); + } + + private static byte[] getQualifier(int j) { + return Bytes.toBytes("col" + j); + } + + private static byte[] getValue(int batchId, int i, int j) { + return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + + "_col" + j); + } + + private void writeTestData(int batchId) throws Exception { + LOG.debug("Writing test data batch " + batchId); + HTable table = new HTable(conf, tableName); + for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { + Put put = new Put(getRowKey(batchId, i)); + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + put.add(CF_BYTES, getQualifier(j), + getValue(batchId, i, j)); + table.put(put); + } + } + table.close(); + } + + private void verifyTestData(int batchId) throws Exception { + LOG.debug("Verifying test data batch " + batchId); + HTable table = new HTable(conf, tableName); + for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { + Get get = new Get(getRowKey(batchId, i)); + Result result = table.get(get); + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + KeyValue kv = result.getColumnLatest(CF_BYTES, getQualifier(j)); + assertEquals(Bytes.toStringBinary(getValue(batchId, i, j)), + Bytes.toStringBinary(kv.getValue())); + } + } + table.close(); + } + + private void writeSomeNewDataAndVerifyAllData() throws Exception { + writeTestData(numBatchesWritten); + ++numBatchesWritten; + for (int i = 0; i < numBatchesWritten; ++i) { + verifyTestData(i); + } + } + + private void setEncodingConf(DataBlockEncoding encoding, + boolean encodeOnDisk) throws IOException { + LOG.debug("Setting CF encoding to " + encoding + " (ordinal=" + + encoding.ordinal() + "), encodeOnDisk=" + encodeOnDisk); + admin.disableTable(tableName); + hcd.setDataBlockEncoding(encoding); + hcd.setEncodeOnDisk(encodeOnDisk); + admin.modifyColumn(tableName, hcd); + admin.enableTable(tableName); + } + + @Test + public void testChangingEncoding() throws Exception { + prepareTest("ChangingEncoding"); + List encodings = + new ArrayList( + Arrays.asList(DataBlockEncoding.values())); + encodings.add(DataBlockEncoding.NONE); + for (boolean encodeOnDisk : new boolean[]{false, true}) { + for (DataBlockEncoding encoding : encodings) { + setEncodingConf(encoding, encodeOnDisk); + writeSomeNewDataAndVerifyAllData(); + } + } + } + + @Test + public void testFlippingEncodeOnDisk() throws Exception { + prepareTest("FlippingEncodeOnDisk"); + // The focus of this test case is flipping the "encoding on disk" flag, + // so we only try a couple of encodings. + DataBlockEncoding[] encodings = new DataBlockEncoding[] { + DataBlockEncoding.NONE, DataBlockEncoding.FAST_DIFF }; + for (DataBlockEncoding encoding : encodings) { + boolean[] flagValues; + if (encoding == DataBlockEncoding.NONE) { + // encodeOnDisk does not matter when not using encoding. + flagValues = + new boolean[] { HColumnDescriptor.DEFAULT_ENCODE_ON_DISK }; + } else { + flagValues = new boolean[] { false, true, false, true }; + } + for (boolean encodeOnDisk : flagValues) { + setEncodingConf(encoding, encodeOnDisk); + writeSomeNewDataAndVerifyAllData(); + } + } + } + + @Test + public void testCrazyRandomChanges() throws Exception { + prepareTest("RandomChanges"); + Random rand = new Random(2934298742974297L); + for (int i = 0; i < 20; ++i) { + int encodingOrdinal = rand.nextInt(DataBlockEncoding.values().length); + DataBlockEncoding encoding = DataBlockEncoding.values()[encodingOrdinal]; + setEncodingConf(encoding, rand.nextBoolean()); + writeSomeNewDataAndVerifyAllData(); + } + } + +} diff --git src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java new file mode 100644 index 0000000..101e409 --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test all of the data block encoding algorithms for correctness. + * Most of the class generate data which will test different branches in code. + */ +@Category(LargeTests.class) +@RunWith(Parameterized.class) +public class TestDataBlockEncoders { + static int NUMBER_OF_KV = 10000; + static int NUM_RANDOM_SEEKS = 10000; + + private RedundantKVGenerator generator = new RedundantKVGenerator(); + private Random randomizer = new Random(42l); + + private final boolean includesMemstoreTS; + + @Parameters + public static Collection parameters() { + return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + } + + public TestDataBlockEncoders(boolean includesMemstoreTS) { + this.includesMemstoreTS = includesMemstoreTS; + } + + private void testAlgorithm(ByteBuffer dataset, DataBlockEncoder encoder) + throws IOException { + // encode + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(baos); + encoder.compressKeyValues(dataOut, dataset, includesMemstoreTS); + + // decode + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + DataInputStream dis = new DataInputStream(bais); + ByteBuffer actualDataset; + actualDataset = encoder.uncompressKeyValues(dis, includesMemstoreTS); + + dataset.rewind(); + actualDataset.rewind(); + + assertEquals("Encoding -> decoding gives different results for " + encoder, + dataset, actualDataset); + } + + /** + * Test data block encoding of empty KeyValue. + * @throws IOException On test failure. + */ + @Test + public void testEmptyKeyValues() throws IOException { + List kvList = new ArrayList(); + byte[] row = new byte[0]; + byte[] family = new byte[0]; + byte[] qualifier = new byte[0]; + byte[] value = new byte[0]; + kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value)); + kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value)); + testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, + includesMemstoreTS)); + } + + /** + * Test KeyValues with negative timestamp. + * @throws IOException On test failure. + */ + @Test + public void testNegativeTimestamps() throws IOException { + List kvList = new ArrayList(); + byte[] row = new byte[0]; + byte[] family = new byte[0]; + byte[] qualifier = new byte[0]; + byte[] value = new byte[0]; + kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value)); + kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value)); + testEncodersOnDataset( + RedundantKVGenerator.convertKvToByteBuffer(kvList, + includesMemstoreTS)); + } + + /** + * Test whether compression -> decompression gives the consistent results on + * pseudorandom sample. + * @throws IOException On test failure. + */ + @Test + public void testExecutionOnSample() throws IOException { + testEncodersOnDataset( + RedundantKVGenerator.convertKvToByteBuffer( + generator.generateTestKeyValues(NUMBER_OF_KV), + includesMemstoreTS)); + } + + /** + * Test seeking while file is encoded. + */ + @Test + public void testSeekingOnSample() throws IOException{ + List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV); + ByteBuffer originalBuffer = + RedundantKVGenerator.convertKvToByteBuffer(sampleKv, + includesMemstoreTS); + List dataBlockEncoders = + DataBlockEncoding.getAllEncoders(); + + // create all seekers + List encodedSeekers = + new ArrayList(); + for (DataBlockEncoder encoder : dataBlockEncoders) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(baos); + encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS); + ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray()); + DataBlockEncoder.EncodedSeeker seeker = + encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS); + seeker.setCurrentBuffer(encodedBuffer); + encodedSeekers.add(seeker); + } + + // test it! + // try a few random seeks + for (boolean seekBefore : new boolean[] {false, true}) { + for (int i = 0 ; i < NUM_RANDOM_SEEKS ; ++i) { + int keyValueId; + if (!seekBefore) { + keyValueId = randomizer.nextInt(sampleKv.size()); + } else { + keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1; + } + + KeyValue keyValue = sampleKv.get(keyValueId); + checkSeekingConsistency(encodedSeekers, seekBefore, keyValue); + } + } + + // check edge cases + checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0)); + for (boolean seekBefore : new boolean[] {false, true}) { + checkSeekingConsistency(encodedSeekers, seekBefore, + sampleKv.get(sampleKv.size() - 1)); + KeyValue midKv = sampleKv.get(sampleKv.size() / 2); + KeyValue lastMidKv = midKv.createLastOnRowCol(); + checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv); + } + } + + /** + * Test iterating on encoded buffers. + */ + @Test + public void testNextOnSample() { + List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV); + ByteBuffer originalBuffer = + RedundantKVGenerator.convertKvToByteBuffer(sampleKv, + includesMemstoreTS); + List dataBlockEncoders = + DataBlockEncoding.getAllEncoders(); + + for (DataBlockEncoder encoder : dataBlockEncoders) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(baos); + try { + encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Bug while encoding using '%s'", encoder.toString()), e); + } + + ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray()); + DataBlockEncoder.EncodedSeeker seeker = + encoder.createSeeker(KeyValue.KEY_COMPARATOR, includesMemstoreTS); + seeker.setCurrentBuffer(encodedBuffer); + int i = 0; + do { + KeyValue expectedKeyValue = sampleKv.get(i); + ByteBuffer keyValue = seeker.getKeyValueBuffer(); + if (0 != Bytes.compareTo( + keyValue.array(), keyValue.arrayOffset(), keyValue.limit(), + expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(), + expectedKeyValue.getLength())) { + + int commonPrefix = 0; + byte[] left = keyValue.array(); + byte[] right = expectedKeyValue.getBuffer(); + int leftOff = keyValue.arrayOffset(); + int rightOff = expectedKeyValue.getOffset(); + int length = Math.min(keyValue.limit(), expectedKeyValue.getLength()); + while (commonPrefix < length && + left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) { + commonPrefix++; + } + + fail(String.format( + "next() produces wrong results " + + "encoder: %s i: %d commonPrefix: %d" + + "\n expected %s\n actual %s", + encoder.toString(), i, commonPrefix, + Bytes.toStringBinary(expectedKeyValue.getBuffer(), + expectedKeyValue.getOffset(), expectedKeyValue.getLength()), + Bytes.toStringBinary(keyValue))); + } + i++; + } while (seeker.next()); + } + } + + /** + * Test whether the decompression of first key is implemented correctly. + */ + @Test + public void testFirstKeyInBlockOnSample() { + List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV); + ByteBuffer originalBuffer = + RedundantKVGenerator.convertKvToByteBuffer(sampleKv, + includesMemstoreTS); + List dataBlockEncoders = + DataBlockEncoding.getAllEncoders(); + + for (DataBlockEncoder encoder : dataBlockEncoders) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(baos); + try { + encoder.compressKeyValues(dataOut, originalBuffer, includesMemstoreTS); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Bug while encoding using '%s'", encoder.toString()), e); + } + + ByteBuffer encodedBuffer = ByteBuffer.wrap(baos.toByteArray()); + ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer); + KeyValue firstKv = sampleKv.get(0); + if (0 != Bytes.compareTo( + keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(), + firstKv.getBuffer(), firstKv.getKeyOffset(), + firstKv.getKeyLength())) { + + int commonPrefix = 0; + int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength()); + while (commonPrefix < length && + keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == + firstKv.getBuffer()[firstKv.getKeyOffset() + commonPrefix]) { + commonPrefix++; + } + fail(String.format("Bug in '%s' commonPrefix %d", + encoder.toString(), commonPrefix)); + } + } + } + + private void checkSeekingConsistency( + List encodedSeekers, boolean seekBefore, + KeyValue keyValue) { + ByteBuffer expectedKeyValue = null; + ByteBuffer expectedKey = null; + ByteBuffer expectedValue = null; + + for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) { + seeker.blockSeekTo(keyValue.getBuffer(), + keyValue.getKeyOffset(), keyValue.getKeyLength(), seekBefore); + seeker.rewind(); + + ByteBuffer actualKeyValue = seeker.getKeyValueBuffer(); + ByteBuffer actualKey = seeker.getKey(); + ByteBuffer actualValue = seeker.getValue(); + + if (expectedKeyValue != null) { + assertEquals(expectedKeyValue, actualKeyValue); + } else { + expectedKeyValue = actualKeyValue; + } + + if (expectedKey != null) { + assertEquals(expectedKey, actualKey); + } else { + expectedKey = actualKey; + } + + if (expectedValue != null) { + assertEquals(expectedValue, actualValue); + } else { + expectedValue = actualValue; + } + } + } + + private void testEncodersOnDataset(ByteBuffer onDataset) + throws IOException{ + List dataBlockEncoders = + DataBlockEncoding.getAllEncoders(); + ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity()); + onDataset.rewind(); + dataset.put(onDataset); + onDataset.rewind(); + dataset.flip(); + + for (DataBlockEncoder encoder : dataBlockEncoders) { + testAlgorithm(dataset, encoder); + + // ensure that dataset is unchanged + dataset.rewind(); + assertEquals("Input of two methods is changed", onDataset, dataset); + } + } +} diff --git src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java new file mode 100644 index 0000000..19b575c --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.encoding; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.LoadTestKVGenerator; +import org.apache.hadoop.hbase.util.MultiThreadedWriter; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Tests encoded seekers by loading and reading values. + */ +@Category(SmallTests.class) +@RunWith(Parameterized.class) +public class TestEncodedSeekers { + + private static final String TABLE_NAME = "encodedSeekersTable"; + private static final String CF_NAME = "encodedSeekersCF"; + private static final byte[] CF_BYTES = Bytes.toBytes(CF_NAME); + private static final int MAX_VERSIONS = 5; + + private static final int MIN_VALUE_SIZE = 30; + private static final int MAX_VALUE_SIZE = 60; + private static final int NUM_ROWS = 20; + private static final int NUM_COLS_PER_ROW = 2; + private static final int NUM_HFILES = 2; + private static final int NUM_ROWS_PER_FLUSH = NUM_ROWS / NUM_HFILES; + + private final HBaseTestingUtility testUtil = new HBaseTestingUtility(); + private final DataBlockEncoding encoding; + + @Parameters + public static Collection parameters() { + List paramList = new ArrayList(); + for (DataBlockEncoding encoding : DataBlockEncoding.values()) { + paramList.add(new Object[] { encoding }); + } + return paramList; + } + + public TestEncodedSeekers(DataBlockEncoding encoding) { + this.encoding = encoding; + } + + @Test + public void testEncodedSeeker() throws IOException { + System.err.println("Testing encoded seekers for encoding " + encoding); + HRegion region = testUtil.createTestRegion(TABLE_NAME, CF_NAME, + Algorithm.NONE, + BloomType.NONE, MAX_VERSIONS, HColumnDescriptor.DEFAULT_BLOCKCACHE, + HFile.DEFAULT_BLOCKSIZE, encoding); + LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator( + MIN_VALUE_SIZE, MAX_VALUE_SIZE); + + // Write + for (int i = 0; i < NUM_ROWS; ++i) { + byte[] key = MultiThreadedWriter.longToByteArrayKey(i); + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + Put put = new Put(key); + String colAsStr = String.valueOf(j); + byte[] value = dataGenerator.generateRandomSizeValue(i, colAsStr); + put.add(CF_BYTES, Bytes.toBytes(colAsStr), value); + region.put(put); + } + if (i % NUM_ROWS_PER_FLUSH == 0) { + region.flushcache(); + } + } + + // Read + for (int i = 0; i < NUM_ROWS; ++i) { + final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i); + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + System.err.println("Reading row " + i + ", column " + j); + final String qualStr = String.valueOf(j); + final byte[] qualBytes = Bytes.toBytes(qualStr); + Get get = new Get(rowKey); + get.addColumn(CF_BYTES, qualBytes); + Result result = region.get(get, null); + assertEquals(1, result.size()); + assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr, + result.getValue(CF_BYTES, qualBytes))); + } + } + } + +} diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java index 7a4b153..61ce077 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java @@ -41,9 +41,15 @@ import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; public class CacheTestUtils { - /*Just checks if heapsize grows when something is cached, and gets smaller when the same object is evicted*/ + private static final boolean includesMemstoreTS = true; - public static void testHeapSizeChanges(final BlockCache toBeTested, final int blockSize){ + /** + * Just checks if heapsize grows when something is cached, and gets smaller + * when the same object is evicted + */ + + public static void testHeapSizeChanges(final BlockCache toBeTested, + final int blockSize) { HFileBlockPair[] blocks = generateHFileBlocks(blockSize, 1); long heapSize = ((HeapSize) toBeTested).heapSize(); toBeTested.cacheBlock(blocks[0].blockName, blocks[0].block); @@ -316,7 +322,8 @@ public class CacheTestUtils { HFileBlock generated = new HFileBlock(BlockType.DATA, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, - prevBlockOffset, cachedBuffer, false, blockSize); + prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER, + blockSize, includesMemstoreTS); String strKey; /* No conflicting keys */ diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java index 5ee825b..25f1bbe 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java @@ -33,6 +33,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.junit.After; @@ -42,6 +43,7 @@ import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; + import static org.junit.Assert.*; /** @@ -61,10 +63,13 @@ public class TestCacheOnWrite { private FileSystem fs; private Random rand = new Random(12983177L); private Path storeFilePath; - private Compression.Algorithm compress; - private CacheOnWriteType cowType; private BlockCache blockCache; - private String testName; + private String testDescription; + + private final CacheOnWriteType cowType; + private final Compression.Algorithm compress; + private final BlockEncoderTestType encoderType; + private final HFileDataBlockEncoder encoder; private static final int DATA_BLOCK_SIZE = 2048; private static final int NUM_KV = 25000; @@ -76,49 +81,87 @@ public class TestCacheOnWrite { KeyValue.Type.values().length - 2; private static enum CacheOnWriteType { - DATA_BLOCKS(BlockType.DATA, CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY), - BLOOM_BLOCKS(BlockType.BLOOM_CHUNK, - CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY), - INDEX_BLOCKS(BlockType.LEAF_INDEX, - CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY); + DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, + BlockType.DATA, BlockType.ENCODED_DATA), + BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, + BlockType.BLOOM_CHUNK), + INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, + BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX); private final String confKey; - private final BlockType inlineBlockType; + private final BlockType blockType1; + private final BlockType blockType2; + + private CacheOnWriteType(String confKey, BlockType blockType) { + this(confKey, blockType, blockType); + } - private CacheOnWriteType(BlockType inlineBlockType, String confKey) { - this.inlineBlockType = inlineBlockType; + private CacheOnWriteType(String confKey, BlockType blockType1, + BlockType blockType2) { + this.blockType1 = blockType1; + this.blockType2 = blockType2; this.confKey = confKey; } public boolean shouldBeCached(BlockType blockType) { - return blockType == inlineBlockType - || blockType == BlockType.INTERMEDIATE_INDEX - && inlineBlockType == BlockType.LEAF_INDEX; + return blockType == blockType1 || blockType == blockType2; } public void modifyConf(Configuration conf) { - for (CacheOnWriteType cowType : CacheOnWriteType.values()) + for (CacheOnWriteType cowType : CacheOnWriteType.values()) { conf.setBoolean(cowType.confKey, cowType == this); + } } } + private static final DataBlockEncoding ENCODING_ALGO = + DataBlockEncoding.PREFIX; + + /** Provides fancy names for three combinations of two booleans */ + private static enum BlockEncoderTestType { + NO_BLOCK_ENCODING(false, false), + BLOCK_ENCODING_IN_CACHE_ONLY(false, true), + BLOCK_ENCODING_EVERYWHERE(true, true); + + private final boolean encodeOnDisk; + private final boolean encodeInCache; + + BlockEncoderTestType(boolean encodeOnDisk, boolean encodeInCache) { + this.encodeOnDisk = encodeOnDisk; + this.encodeInCache = encodeInCache; + } + + public HFileDataBlockEncoder getEncoder() { + return new HFileDataBlockEncoderImpl( + encodeOnDisk ? ENCODING_ALGO : DataBlockEncoding.NONE, + encodeInCache ? ENCODING_ALGO : DataBlockEncoding.NONE); + } + } + public TestCacheOnWrite(CacheOnWriteType cowType, - Compression.Algorithm compress) { + Compression.Algorithm compress, BlockEncoderTestType encoderType) { this.cowType = cowType; this.compress = compress; - testName = "[cacheOnWrite=" + cowType + ", compress=" + compress + "]"; - System.out.println(testName); + this.encoderType = encoderType; + this.encoder = encoderType.getEncoder(); + testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress + + ", encoderType=" + encoderType + "]"; + System.out.println(testDescription); } @Parameters public static Collection getParameters() { List cowTypes = new ArrayList(); - for (CacheOnWriteType cowType : CacheOnWriteType.values()) + for (CacheOnWriteType cowType : CacheOnWriteType.values()) { for (Compression.Algorithm compress : HBaseTestingUtility.COMPRESSION_ALGORITHMS) { - cowTypes.add(new Object[] { cowType, compress }); + for (BlockEncoderTestType encoderType : + BlockEncoderTestType.values()) { + cowTypes.add(new Object[] { cowType, compress, encoderType }); + } } + } return cowTypes; } @@ -139,7 +182,6 @@ public class TestCacheOnWrite { fs = FileSystem.get(conf); cacheConf = new CacheConfig(conf); blockCache = cacheConf.getBlockCache(); - System.out.println("setUp()"); } @After @@ -155,11 +197,13 @@ public class TestCacheOnWrite { } private void readStoreFile() throws IOException { - HFileReaderV2 reader = (HFileReaderV2) HFile.createReader(fs, - storeFilePath, cacheConf); + HFileReaderV2 reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs, + storeFilePath, cacheConf, encoder.getEncodingInCache()); LOG.info("HFile information: " + reader); - HFileScanner scanner = reader.getScanner(false, false); - assertTrue(testName, scanner.seekTo()); + final boolean cacheBlocks = false; + final boolean pread = false; + HFileScanner scanner = reader.getScanner(cacheBlocks, pread); + assertTrue(testDescription, scanner.seekTo()); long offset = 0; HFileBlock prevBlock = null; @@ -174,10 +218,11 @@ public class TestCacheOnWrite { // Flags: don't cache the block, use pread, this is not a compaction. HFileBlock block = reader.readBlock(offset, onDiskSize, false, true, false); - BlockCacheKey blockCacheKey = HFile.getBlockCacheKey(reader.getName(), offset); + BlockCacheKey blockCacheKey = HFile.getBlockCacheKey(reader.getName(), + offset); boolean isCached = blockCache.getBlock(blockCacheKey, true) != null; boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType()); - assertEquals(testName + " " + block, shouldBeCached, isCached); + assertEquals(testDescription + " " + block, shouldBeCached, isCached); prevBlock = block; offset += block.getOnDiskSizeWithHeader(); BlockType bt = block.getBlockType(); @@ -187,8 +232,10 @@ public class TestCacheOnWrite { LOG.info("Block count by type: " + blockCountByType); String countByType = blockCountByType.toString(); - assertEquals( - "{DATA=1379, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=24}", + BlockType cachedDataBlockType = + encoderType.encodeInCache ? BlockType.ENCODED_DATA : BlockType.DATA; + assertEquals("{" + cachedDataBlockType + + "=1379, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=24}", countByType); reader.close(); @@ -213,9 +260,10 @@ public class TestCacheOnWrite { public void writeStoreFile() throws IOException { Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(), "test_cache_on_write"); - StoreFile.Writer sfw = StoreFile.createWriter(fs, storeFileParentDir, - DATA_BLOCK_SIZE, compress, KeyValue.COMPARATOR, conf, - cacheConf, StoreFile.BloomType.ROWCOL, NUM_KV); + StoreFile.Writer sfw = + StoreFile.createWriter(fs, storeFileParentDir, DATA_BLOCK_SIZE, + compress, encoder, KeyValue.COMPARATOR, conf, cacheConf, + StoreFile.BloomType.ROWCOL, NUM_KV); final int rowLen = 32; for (int i = 0; i < NUM_KV; ++i) { @@ -236,7 +284,6 @@ public class TestCacheOnWrite { storeFilePath = sfw.getPath(); } - @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java index da0067f..3312cfa 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -158,7 +158,7 @@ public class TestHFile extends HBaseTestCase { writeRecords(writer); fout.close(); FSDataInputStream fin = fs.open(ncTFile); - Reader reader = HFile.createReader(ncTFile, fs.open(ncTFile), + Reader reader = HFile.createReaderFromStream(ncTFile, fs.open(ncTFile), fs.getFileStatus(ncTFile).getLen(), cacheConf); System.out.println(cacheConf.toString()); // Load up the index. @@ -236,7 +236,7 @@ public class TestHFile extends HBaseTestCase { writer.close(); fout.close(); FSDataInputStream fin = fs.open(mFile); - Reader reader = HFile.createReader(mFile, fs.open(mFile), + Reader reader = HFile.createReaderFromStream(mFile, fs.open(mFile), this.fs.getFileStatus(mFile).getLen(), cacheConf); reader.loadFileInfo(); // No data -- this should return false. diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index 742ffe8..6cbf9c3 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -27,6 +27,8 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -45,16 +47,24 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.DoubleOutputStream; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.compress.Compressor; import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.*; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; @Category(MediumTests.class) +@RunWith(Parameterized.class) public class TestHFileBlock { // change this value to activate more logs private static final boolean detailedLogging = false; @@ -69,14 +79,29 @@ public class TestHFileBlock { static final Compression.Algorithm[] GZIP_ONLY = { GZ }; private static final int NUM_TEST_BLOCKS = 1000; - private static final int NUM_READER_THREADS = 26; + // Used to generate KeyValues + private static int NUM_KEYVALUES = 50; + private static int FIELD_LENGTH = 10; + private static float CHANCE_TO_REPEAT = 0.6f; + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private FileSystem fs; private int uncompressedSizeV1; + private final boolean includesMemstoreTS; + + public TestHFileBlock(boolean includesMemstoreTS) { + this.includesMemstoreTS = includesMemstoreTS; + } + + @Parameters + public static Collection parameters() { + return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + } + @Before public void setUp() throws IOException { fs = FileSystem.get(TEST_UTIL.getConfiguration()); @@ -88,6 +113,72 @@ public class TestHFileBlock { dos.writeInt(i / 100); } + private int writeTestKeyValues(OutputStream dos, int seed) + throws IOException { + List keyValues = new ArrayList(); + Random randomizer = new Random(42l + seed); // just any fixed number + + // generate keyValues + for (int i = 0 ; i < NUM_KEYVALUES ; ++i) { + byte[] row; + long timestamp; + byte[] family; + byte[] qualifier; + byte[] value; + + // generate it or repeat, it should compress well + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + row = keyValues.get(randomizer.nextInt(keyValues.size())).getRow(); + } else { + row = new byte[FIELD_LENGTH]; + randomizer.nextBytes(row); + } + if (0 == i) { + family = new byte[FIELD_LENGTH]; + randomizer.nextBytes(family); + } else { + family = keyValues.get(0).getFamily(); + } + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + qualifier = keyValues.get( + randomizer.nextInt(keyValues.size())).getQualifier(); + } else { + qualifier = new byte[FIELD_LENGTH]; + randomizer.nextBytes(qualifier); + } + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + value = keyValues.get(randomizer.nextInt(keyValues.size())).getValue(); + } else { + value = new byte[FIELD_LENGTH]; + randomizer.nextBytes(value); + } + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + timestamp = keyValues.get( + randomizer.nextInt(keyValues.size())).getTimestamp(); + } else { + timestamp = randomizer.nextLong(); + } + + keyValues.add(new KeyValue(row, family, qualifier, timestamp, value)); + } + + // sort it and write to stream + int totalSize = 0; + Collections.sort(keyValues, KeyValue.COMPARATOR); + DataOutputStream dataOutputStream = new DataOutputStream(dos); + for (KeyValue kv : keyValues) { + totalSize += kv.getLength(); + dataOutputStream.write(kv.getBuffer(), kv.getOffset(), kv.getLength()); + if (includesMemstoreTS) { + long memstoreTS = randomizer.nextLong(); + WritableUtils.writeVLong(dataOutputStream, memstoreTS); + totalSize += WritableUtils.getVIntSize(memstoreTS); + } + } + + return totalSize; + } + public byte[] createTestV1Block(Compression.Algorithm algo) throws IOException { Compressor compressor = algo.getCompressor(); @@ -105,8 +196,9 @@ public class TestHFileBlock { private byte[] createTestV2Block(Compression.Algorithm algo) throws IOException { final BlockType blockType = BlockType.DATA; - HFileBlock.Writer hbw = new HFileBlock.Writer(algo); - DataOutputStream dos = hbw.startWriting(blockType, false); + HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, + includesMemstoreTS); + DataOutputStream dos = hbw.startWriting(blockType); writeTestBlockContents(dos); byte[] headerAndData = hbw.getHeaderAndData(); assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader()); @@ -194,10 +286,11 @@ public class TestHFileBlock { Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo); FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(algo); + HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, + includesMemstoreTS); long totalSize = 0; for (int blockId = 0; blockId < 2; ++blockId) { - DataOutputStream dos = hbw.startWriting(BlockType.DATA, false); + DataOutputStream dos = hbw.startWriting(BlockType.DATA); for (int i = 0; i < 1234; ++i) dos.writeInt(i); hbw.writeHeaderAndData(os); @@ -240,6 +333,136 @@ public class TestHFileBlock { } } + /** + * Test encoding/decoding data blocks. + * @throws IOException a bug or a problem with temporary files. + */ + @Test + public void testDataBlockEncoding() throws IOException { + final int numBlocks = 5; + for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { + for (boolean pread : new boolean[] { false, true }) { + for (DataBlockEncoding encoding : DataBlockEncoding.values()) { + Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + + algo + "_" + encoding.toString()); + FSDataOutputStream os = fs.create(path); + HFileDataBlockEncoder dataBlockEncoder = + new HFileDataBlockEncoderImpl(encoding); + HFileBlock.Writer hbw = new HFileBlock.Writer(algo, dataBlockEncoder, + includesMemstoreTS); + long totalSize = 0; + final List encodedSizes = new ArrayList(); + final List encodedBlocks = new ArrayList(); + for (int blockId = 0; blockId < numBlocks; ++blockId) { + writeEncodedBlock(encoding, hbw, encodedSizes, encodedBlocks, + blockId); + + hbw.writeHeaderAndData(os); + totalSize += hbw.getOnDiskSizeWithHeader(); + } + os.close(); + + FSDataInputStream is = fs.open(path); + HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(is, algo, + totalSize); + hbr.setDataBlockEncoder(dataBlockEncoder); + hbr.setIncludesMemstoreTS(includesMemstoreTS); + + HFileBlock b; + int pos = 0; + for (int blockId = 0; blockId < numBlocks; ++blockId) { + b = hbr.readBlockData(pos, -1, -1, pread); + b.sanityCheck(); + pos += b.getOnDiskSizeWithHeader(); + + assertEquals((int) encodedSizes.get(blockId), + b.getUncompressedSizeWithoutHeader()); + ByteBuffer actualBuffer = b.getBufferWithoutHeader(); + if (encoding != DataBlockEncoding.NONE) { + // We expect a two-byte big-endian encoding id. + assertEquals(0, actualBuffer.get(0)); + assertEquals(encoding.getId(), actualBuffer.get(1)); + actualBuffer.position(2); + actualBuffer = actualBuffer.slice(); + } + + ByteBuffer expectedBuffer = encodedBlocks.get(blockId); + expectedBuffer.rewind(); + + // test if content matches, produce nice message + assertBuffersEqual(expectedBuffer, actualBuffer, algo, encoding, + pread); + } + is.close(); + } + } + } + } + + private void writeEncodedBlock(DataBlockEncoding encoding, + HFileBlock.Writer hbw, final List encodedSizes, + final List encodedBlocks, int blockId) throws IOException { + DataOutputStream dos = hbw.startWriting(BlockType.DATA); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DoubleOutputStream doubleOutputStream = + new DoubleOutputStream(dos, baos); + + final int rawBlockSize = writeTestKeyValues(doubleOutputStream, + blockId); + + ByteBuffer rawBuf = ByteBuffer.wrap(baos.toByteArray()); + rawBuf.rewind(); + + final int encodedSize; + final ByteBuffer encodedBuf; + if (encoding == DataBlockEncoding.NONE) { + encodedSize = rawBlockSize; + encodedBuf = rawBuf; + } else { + ByteArrayOutputStream encodedOut = new ByteArrayOutputStream(); + encoding.getEncoder().compressKeyValues( + new DataOutputStream(encodedOut), + rawBuf.duplicate(), includesMemstoreTS); + // We need to account for the two-byte encoding algorithm ID that + // comes after the 24-byte block header but before encoded KVs. + encodedSize = encodedOut.size() + DataBlockEncoding.ID_SIZE; + encodedBuf = ByteBuffer.wrap(encodedOut.toByteArray()); + } + encodedSizes.add(encodedSize); + encodedBlocks.add(encodedBuf); + } + + private void assertBuffersEqual(ByteBuffer expectedBuffer, + ByteBuffer actualBuffer, Compression.Algorithm compression, + DataBlockEncoding encoding, boolean pread) { + if (!actualBuffer.equals(expectedBuffer)) { + int prefix = 0; + int minLimit = Math.min(expectedBuffer.limit(), actualBuffer.limit()); + while (prefix < minLimit && + expectedBuffer.get(prefix) == actualBuffer.get(prefix)) { + prefix++; + } + + fail(String.format( + "Content mismath for compression %s, encoding %s, " + + "pread %s, commonPrefix %d, expected %s, got %s", + compression, encoding, pread, prefix, + nextBytesToStr(expectedBuffer, prefix), + nextBytesToStr(actualBuffer, prefix))); + } + } + + /** + * Convert a few next bytes in the given buffer at the given position to + * string. Used for error messages. + */ + private static String nextBytesToStr(ByteBuffer buf, int pos) { + int maxBytes = buf.limit() - pos; + int numBytes = Math.min(16, maxBytes); + return Bytes.toStringBinary(buf.array(), buf.arrayOffset() + pos, + numBytes) + (numBytes < maxBytes ? "..." : ""); + } + @Test public void testPreviousOffset() throws IOException { for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { @@ -446,13 +669,17 @@ public class TestHFileBlock { ) throws IOException { boolean cacheOnWrite = expectedContents != null; FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo); + HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo, null, + includesMemstoreTS); Map prevOffsetByType = new HashMap(); long totalSize = 0; for (int i = 0; i < NUM_TEST_BLOCKS; ++i) { int blockTypeOrdinal = rand.nextInt(BlockType.values().length); + if (blockTypeOrdinal == BlockType.ENCODED_DATA.ordinal()) { + blockTypeOrdinal = BlockType.DATA.ordinal(); + } BlockType bt = BlockType.values()[blockTypeOrdinal]; - DataOutputStream dos = hbw.startWriting(bt, cacheOnWrite); + DataOutputStream dos = hbw.startWriting(bt); for (int j = 0; j < rand.nextInt(500); ++j) { // This might compress well. dos.writeShort(i + 1); @@ -501,7 +728,7 @@ public class TestHFileBlock { byte[] byteArr = new byte[HFileBlock.HEADER_SIZE + size]; ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size); HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, - true, -1); + HFileBlock.FILL_HEADER, -1, includesMemstoreTS); long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase(buf.getClass(), true) + HFileBlock.HEADER_SIZE + size); diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java index d873086..91947b9 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java @@ -20,6 +20,10 @@ package org.apache.hadoop.hbase.io.hfile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -44,7 +48,6 @@ import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; - import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -52,8 +55,6 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; -import static org.junit.Assert.*; - @RunWith(Parameterized.class) @Category(MediumTests.class) public class TestHFileBlockIndex { @@ -92,6 +93,8 @@ public class TestHFileBlockIndex { private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 }; + private static final boolean includesMemstoreTS = true; + static { assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; @@ -210,13 +213,14 @@ public class TestHFileBlockIndex { private void writeWholeIndex() throws IOException { assertEquals(0, keys.size()); - HFileBlock.Writer hbw = new HFileBlock.Writer(compr); + HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null, + includesMemstoreTS); FSDataOutputStream outputStream = fs.create(path); HFileBlockIndex.BlockIndexWriter biw = new HFileBlockIndex.BlockIndexWriter(hbw, null, null); for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { - hbw.startWriting(BlockType.DATA, false).write( + hbw.startWriting(BlockType.DATA).write( String.valueOf(rand.nextInt(1000)).getBytes()); long blockOffset = outputStream.getPos(); hbw.writeHeaderAndData(outputStream); @@ -251,7 +255,7 @@ public class TestHFileBlockIndex { boolean isClosing) throws IOException { while (biw.shouldWriteBlock(isClosing)) { long offset = outputStream.getPos(); - biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType(), false)); + biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); hbw.writeHeaderAndData(outputStream); biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), hbw.getUncompressedSizeWithoutHeader()); @@ -479,7 +483,7 @@ public class TestHFileBlockIndex { { HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).createWriter(fs, - hfilePath, SMALL_BLOCK_SIZE, compr, KeyValue.KEY_COMPARATOR); + hfilePath, SMALL_BLOCK_SIZE, compr, null, KeyValue.KEY_COMPARATOR); Random rand = new Random(19231737); for (int i = 0; i < NUM_KV; ++i) { diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java new file mode 100644 index 0000000..9a4673e --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.RedundantKVGenerator; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestHFileDataBlockEncoder { + private Configuration conf; + private final HBaseTestingUtility TEST_UTIL = + new HBaseTestingUtility(); + private HFileDataBlockEncoderImpl blockEncoder; + private RedundantKVGenerator generator = new RedundantKVGenerator(); + private SchemaConfigured UNKNOWN_TABLE_AND_CF = + SchemaConfigured.createUnknown(); + private boolean includesMemstoreTS; + + /** + * Create test for given data block encoding configuration. + * @param blockEncoder What kind of encoding policy will be used. + */ + public TestHFileDataBlockEncoder(HFileDataBlockEncoderImpl blockEncoder, + boolean includesMemstoreTS) { + this.blockEncoder = blockEncoder; + this.includesMemstoreTS = includesMemstoreTS; + System.err.println("On-disk encoding: " + blockEncoder.getEncodingOnDisk() + + ", in-cache encoding: " + blockEncoder.getEncodingInCache() + + ", includesMemstoreTS: " + includesMemstoreTS); + } + + /** + * Preparation before JUnit test. + */ + @Before + public void setUp() { + conf = TEST_UTIL.getConfiguration(); + SchemaMetrics.configureGlobally(conf); + } + + /** + * Cleanup after JUnit test. + */ + @After + public void tearDown() throws IOException { + TEST_UTIL.cleanupTestDir(); + } + + /** + * Test putting and taking out blocks into cache with different + * encoding options. + */ + @Test + public void testEncodingWithCache() { + HFileBlock block = getSampleHFileBlock(); + LruBlockCache blockCache = + new LruBlockCache(8 * 1024 * 1024, 32 * 1024); + HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(block, false); + BlockCacheKey cacheKey = new BlockCacheKey("test", 0); + blockCache.cacheBlock(cacheKey, cacheBlock); + + HeapSize heapSize = blockCache.getBlock(cacheKey, false); + assertTrue(heapSize instanceof HFileBlock); + + HFileBlock returnedBlock = (HFileBlock) heapSize;; + + if (blockEncoder.getEncodingInCache() == + DataBlockEncoding.NONE) { + assertEquals(block.getBufferWithHeader(), + returnedBlock.getBufferWithHeader()); + } else { + if (BlockType.ENCODED_DATA != returnedBlock.getBlockType()) { + System.out.println(blockEncoder); + } + assertEquals(BlockType.ENCODED_DATA, returnedBlock.getBlockType()); + } + } + + /** + * Test writing to disk. + */ + @Test + public void testEncodingWritePath() { + // usually we have just block without headers, but don't complicate that + HFileBlock block = getSampleHFileBlock(); + Pair result = + blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), + includesMemstoreTS); + + int size = result.getFirst().limit() - HFileBlock.HEADER_SIZE; + HFileBlock blockOnDisk = new HFileBlock(result.getSecond(), + size, size, -1, result.getFirst(), HFileBlock.FILL_HEADER, 0, + includesMemstoreTS); + + if (blockEncoder.getEncodingOnDisk() != + DataBlockEncoding.NONE) { + assertEquals(BlockType.ENCODED_DATA, blockOnDisk.getBlockType()); + assertEquals(blockEncoder.getEncodingOnDisk().getId(), + blockOnDisk.getDataBlockEncodingId()); + } else { + assertEquals(BlockType.DATA, blockOnDisk.getBlockType()); + } + } + + /** + * Test converting blocks from disk to cache format. + */ + @Test + public void testEncodingReadPath() { + HFileBlock origBlock = getSampleHFileBlock(); + blockEncoder.diskToCacheFormat(origBlock, false); + } + + private HFileBlock getSampleHFileBlock() { + ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer( + generator.generateTestKeyValues(60), includesMemstoreTS); + int size = keyValues.limit(); + ByteBuffer buf = ByteBuffer.allocate(size + HFileBlock.HEADER_SIZE); + buf.position(HFileBlock.HEADER_SIZE); + keyValues.rewind(); + buf.put(keyValues); + HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, 0, includesMemstoreTS); + UNKNOWN_TABLE_AND_CF.passSchemaMetricsTo(b); + return b; + } + + /** + * @return All possible data block encoding configurations + */ + @Parameters + public static Collection getAllConfigurations() { + List configurations = + new ArrayList(); + + for (DataBlockEncoding diskAlgo : DataBlockEncoding.values()) { + for (DataBlockEncoding cacheAlgo : DataBlockEncoding.values()) { + if (diskAlgo != cacheAlgo && diskAlgo != DataBlockEncoding.NONE) { + // We allow (1) the same encoding on disk and in cache, and + // (2) some encoding in cache but no encoding on disk (for testing). + continue; + } + for (boolean includesMemstoreTS : new boolean[] {false, true}) { + configurations.add(new Object[] { + new HFileDataBlockEncoderImpl(diskAlgo, cacheAlgo), + new Boolean(includesMemstoreTS)}); + } + } + } + + return configurations; + } +} diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java index 59896c5..66d9fce 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java @@ -241,7 +241,7 @@ public class TestHFilePerformance extends TestCase { FSDataInputStream fin = fs.open(path); if ("HFile".equals(fileType)){ - HFile.Reader reader = HFile.createReader(path, fs.open(path), + HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path), fs.getFileStatus(path).getLen(), new CacheConfig(conf)); reader.loadFileInfo(); switch (method) { diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java index a44d656..7203d2b 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java @@ -167,7 +167,7 @@ public class TestHFileSeek extends TestCase { int miss = 0; long totalBytes = 0; FSDataInputStream fsdis = fs.open(path); - Reader reader = HFile.createReader(path, fsdis, + Reader reader = HFile.createReaderFromStream(path, fsdis, fs.getFileStatus(path).getLen(), new CacheConfig(conf)); reader.loadFileInfo(); KeySampler kSampler = diff --git src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java index 474cbcf..5ff4dfd 100644 --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java @@ -76,7 +76,7 @@ public class TestHFileWriterV2 { final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ; HFileWriterV2 writer = new HFileWriterV2(conf, new CacheConfig(conf), fs, - hfilePath, 4096, COMPRESS_ALGO, KeyValue.KEY_COMPARATOR); + hfilePath, 4096, COMPRESS_ALGO, null, KeyValue.KEY_COMPARATOR); long totalKeyLength = 0; long totalValueLength = 0; @@ -125,10 +125,12 @@ public class TestHFileWriterV2 { new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize); // Comparator class name is stored in the trailer in version 2. RawComparator comparator = trailer.createComparator(); - HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator, - trailer.getNumDataIndexLevels()); - HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader( - Bytes.BYTES_RAWCOMPARATOR, 1); + HFileBlockIndex.BlockIndexReader dataBlockIndexReader = + new HFileBlockIndex.BlockIndexReader(comparator, + trailer.getNumDataIndexLevels()); + HFileBlockIndex.BlockIndexReader metaBlockIndexReader = + new HFileBlockIndex.BlockIndexReader( + Bytes.BYTES_RAWCOMPARATOR, 1); HFileBlock.BlockIterator blockIter = blockReader.blockRange( trailer.getLoadOnOpenDataOffset(), @@ -146,8 +148,10 @@ public class TestHFileWriterV2 { // File info FileInfo fileInfo = new FileInfo(); fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); - byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION); - boolean includeMemstoreTS = (keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0); + byte [] keyValueFormatVersion = fileInfo.get( + HFileWriterV2.KEY_VALUE_VERSION); + boolean includeMemstoreTS = keyValueFormatVersion != null && + Bytes.toInt(keyValueFormatVersion) > 0; // Counters for the number of key/value pairs and the number of blocks int entriesRead = 0; diff --git src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java index e3bc323..3b1d3fc 100644 --- src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java +++ src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java @@ -133,6 +133,8 @@ public class TestImportExport { 5, /* versions */ true /* keep deleted cells */, HColumnDescriptor.DEFAULT_COMPRESSION, + HColumnDescriptor.DEFAULT_ENCODE_ON_DISK, + HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING, HColumnDescriptor.DEFAULT_IN_MEMORY, HColumnDescriptor.DEFAULT_BLOCKCACHE, HColumnDescriptor.DEFAULT_BLOCKSIZE, @@ -179,6 +181,8 @@ public class TestImportExport { 5, /* versions */ true /* keep deleted cells */, HColumnDescriptor.DEFAULT_COMPRESSION, + HColumnDescriptor.DEFAULT_ENCODE_ON_DISK, + HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING, HColumnDescriptor.DEFAULT_IN_MEMORY, HColumnDescriptor.DEFAULT_BLOCKCACHE, HColumnDescriptor.DEFAULT_BLOCKSIZE, diff --git src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java index 45aed4d..456eb77 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java @@ -184,8 +184,8 @@ public class CreateRandomStoreFile { } StoreFile.Writer sfw = StoreFile.createWriter(fs, outputDir, blockSize, - compr, KeyValue.COMPARATOR, conf, new CacheConfig(conf), bloomType, - numKV); + compr, null, KeyValue.COMPARATOR, conf, new CacheConfig(conf), + bloomType, numKV); rand = new Random(); LOG.info("Writing " + numKV + " key/value pairs"); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java new file mode 100644 index 0000000..ed5b945 --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java @@ -0,0 +1,585 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; + +/** + * Tests various algorithms for key compression on an existing HFile. Useful + * for testing, debugging and benchmarking. + */ +public class DataBlockEncodingTool { + private static final Log LOG = LogFactory.getLog( + DataBlockEncodingTool.class); + + private static final boolean includesMemstoreTS = true; + + /** + * How many times should benchmark run. + * More times means better data in terms of statistics. + * It has to be larger than BENCHMARK_N_OMIT. + */ + public static int BENCHMARK_N_TIMES = 12; + + /** + * How many first runs should omit benchmark. + * Usually it is one in order to exclude setup cost. + * Has to be 0 or larger. + */ + public static int BENCHMARK_N_OMIT = 2; + + private List codecs = new ArrayList(); + private int totalPrefixLength = 0; + private int totalKeyLength = 0; + private int totalValueLength = 0; + private int totalKeyRedundancyLength = 0; + + final private String compressionAlgorithmName; + final private Algorithm compressionAlgorithm; + final private Compressor compressor; + final private Decompressor decompressor; + + /** + * @param compressionAlgorithmName What kind of algorithm should be used + * as baseline for comparison (e.g. lzo, gz). + */ + public DataBlockEncodingTool(String compressionAlgorithmName) { + this.compressionAlgorithmName = compressionAlgorithmName; + this.compressionAlgorithm = Compression.getCompressionAlgorithmByName( + compressionAlgorithmName); + this.compressor = this.compressionAlgorithm.getCompressor(); + this.decompressor = this.compressionAlgorithm.getDecompressor(); + } + /** + * Check statistics for given HFile for different data block encoders. + * @param scanner Of file which will be compressed. + * @param kvLimit Maximal count of KeyValue which will be processed. + * @throws IOException thrown if scanner is invalid + */ + public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) + throws IOException { + scanner.seek(KeyValue.LOWESTKEY); + + KeyValue currentKv; + + byte[] previousKey = null; + byte[] currentKey; + + List dataBlockEncoders = + DataBlockEncoding.getAllEncoders(); + + for (DataBlockEncoder d : dataBlockEncoders) { + codecs.add(new EncodedDataBlock(d, includesMemstoreTS)); + } + + int j = 0; + while ((currentKv = scanner.next()) != null && j < kvLimit) { + // Iterates through key/value pairs + j++; + currentKey = currentKv.getKey(); + if (previousKey != null) { + for (int i = 0 ; i < previousKey.length && i < currentKey.length && + previousKey[i] == currentKey[i] ; ++i) { + totalKeyRedundancyLength++; + } + } + + for (EncodedDataBlock codec : codecs) { + codec.addKv(currentKv); + } + + previousKey = currentKey; + + totalPrefixLength += currentKv.getLength() - currentKv.getKeyLength() - + currentKv.getValueLength(); + totalKeyLength += currentKv.getKeyLength(); + totalValueLength += currentKv.getValueLength(); + } + } + + /** + * Verify if all data block encoders are working properly. + * + * @param scanner Of file which was compressed. + * @param kvLimit Maximal count of KeyValue which will be processed. + * @return true if all data block encoders compressed/decompressed correctly. + * @throws IOException thrown if scanner is invalid + */ + public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit) + throws IOException { + KeyValue currentKv; + + scanner.seek(KeyValue.LOWESTKEY); + List> codecIterators = + new ArrayList>(); + for(EncodedDataBlock codec : codecs) { + codecIterators.add(codec.getIterator()); + } + + int j = 0; + while ((currentKv = scanner.next()) != null && j < kvLimit) { + // Iterates through key/value pairs + ++j; + for (Iterator it : codecIterators) { + KeyValue codecKv = it.next(); + if (codecKv == null || 0 != Bytes.compareTo( + codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(), + currentKv.getBuffer(), currentKv.getOffset(), + currentKv.getLength())) { + if (codecKv == null) { + LOG.error("There is a bug in codec " + it + + " it returned null KeyValue,"); + } else { + int prefix = 0; + int limitLength = 2 * Bytes.SIZEOF_INT + + Math.min(codecKv.getLength(), currentKv.getLength()); + while (prefix < limitLength && + codecKv.getBuffer()[prefix + codecKv.getOffset()] == + currentKv.getBuffer()[prefix + currentKv.getOffset()]) { + prefix++; + } + + LOG.error("There is bug in codec " + it.toString() + + "\n on element " + j + + "\n codecKv.getKeyLength() " + codecKv.getKeyLength() + + "\n codecKv.getValueLength() " + codecKv.getValueLength() + + "\n codecKv.getLength() " + codecKv.getLength() + + "\n currentKv.getKeyLength() " + currentKv.getKeyLength() + + "\n currentKv.getValueLength() " + currentKv.getValueLength() + + "\n codecKv.getLength() " + currentKv.getLength() + + "\n currentKV rowLength " + currentKv.getRowLength() + + " familyName " + currentKv.getFamilyLength() + + " qualifier " + currentKv.getQualifierLength() + + "\n prefix " + prefix + + "\n codecKv '" + Bytes.toStringBinary(codecKv.getBuffer(), + codecKv.getOffset(), prefix) + "' diff '" + + Bytes.toStringBinary(codecKv.getBuffer(), + codecKv.getOffset() + prefix, codecKv.getLength() - + prefix) + "'" + + "\n currentKv '" + Bytes.toStringBinary( + currentKv.getBuffer(), + currentKv.getOffset(), prefix) + "' diff '" + + Bytes.toStringBinary(currentKv.getBuffer(), + currentKv.getOffset() + prefix, currentKv.getLength() - + prefix) + "'" + ); + } + return false; + } + } + } + + LOG.info("Verification was successful!"); + + return true; + } + + /** + * Benchmark codec's speed. + */ + public void benchmarkCodecs() { + int prevTotalSize = -1; + for (EncodedDataBlock codec : codecs) { + prevTotalSize = benchmarkEncoder(prevTotalSize, codec); + } + + byte[] buffer = codecs.get(0).getRawKeyValues(); + + benchmarkDefaultCompression(prevTotalSize, buffer); + } + + /** + * Benchmark compression/decompression throughput. + * @param previousTotalSize Total size used for verification. Use -1 if + * unknown. + * @param codec Tested encoder. + * @return Size of uncompressed data. + */ + private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) { + int prevTotalSize = previousTotalSize; + int totalSize = 0; + + // decompression time + List durations = new ArrayList(); + for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) { + totalSize = 0; + + Iterator it; + + it = codec.getIterator(); + + // count only the algorithm time, without memory allocations + // (expect first time) + final long startTime = System.nanoTime(); + while (it.hasNext()) { + totalSize += it.next().getLength(); + } + final long finishTime = System.nanoTime(); + if (itTime >= BENCHMARK_N_OMIT) { + durations.add(finishTime - startTime); + } + + if (prevTotalSize != -1 && prevTotalSize != totalSize) { + throw new IllegalStateException(String.format( + "Algorithm '%s' decoded data to different size", codec.toString())); + } + prevTotalSize = totalSize; + } + + // compression time + List compressDurations = new ArrayList(); + for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) { + final long startTime = System.nanoTime(); + codec.doCompressData(); + final long finishTime = System.nanoTime(); + if (itTime >= BENCHMARK_N_OMIT) { + compressDurations.add(finishTime - startTime); + } + } + + System.out.println(codec.toString() + ":"); + printBenchmarkResult(totalSize, compressDurations, false); + printBenchmarkResult(totalSize, durations, true); + + return prevTotalSize; + } + + private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer) { + benchmarkAlgorithm(compressionAlgorithm, compressor, decompressor, + compressionAlgorithmName.toUpperCase(), rawBuffer, 0, totalSize); + } + + /** + * Check decompress performance of a given algorithm and print it. + * @param algorithm Compression algorithm. + * @param compressorCodec Compressor to be tested. + * @param decompressorCodec Decompressor of the same algorithm. + * @param name Name of algorithm. + * @param buffer Buffer to be compressed. + * @param offset Position of the beginning of the data. + * @param length Length of data in buffer. + */ + public static void benchmarkAlgorithm( + Compression.Algorithm algorithm, + Compressor compressorCodec, + Decompressor decompressorCodec, + String name, + byte[] buffer, int offset, int length) { + System.out.println(name + ":"); + + // compress it + List compressDurations = new ArrayList(); + ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + OutputStream compressingStream; + try { + for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) { + final long startTime = System.nanoTime(); + compressingStream = algorithm.createCompressionStream( + compressedStream, compressorCodec, 0); + compressingStream.write(buffer, offset, length); + compressingStream.flush(); + compressedStream.toByteArray(); + + final long finishTime = System.nanoTime(); + + // add time record + if (itTime >= BENCHMARK_N_OMIT) { + compressDurations.add(finishTime - startTime); + } + + if (itTime + 1 < BENCHMARK_N_TIMES) { // not the last one + compressedStream.reset(); + } + } + } catch (IOException e) { + throw new RuntimeException(String.format( + "Benchmark, or encoding algorithm '%s' cause some stream problems", + name), e); + } + printBenchmarkResult(length, compressDurations, false); + + + byte[] compBuffer = compressedStream.toByteArray(); + + // uncompress it several times and measure performance + List durations = new ArrayList(); + for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) { + final long startTime = System.nanoTime(); + byte[] newBuf = new byte[length + 1]; + + try { + + ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer, + 0, compBuffer.length); + InputStream decompressedStream = algorithm.createDecompressionStream( + downStream, decompressorCodec, 0); + + int destOffset = 0; + int nextChunk; + while ((nextChunk = decompressedStream.available()) > 0) { + destOffset += decompressedStream.read(newBuf, destOffset, nextChunk); + } + decompressedStream.close(); + + // iterate over KeyValue + KeyValue kv; + for (int pos = 0 ; pos < length ; pos += kv.getLength()) { + kv = new KeyValue(newBuf, pos); + } + + } catch (IOException e) { + throw new RuntimeException(String.format( + "Decoding path in '%s' algorithm cause exception ", name), e); + } + + final long finishTime = System.nanoTime(); + + // check correctness + if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) { + int prefix = 0; + for( ; prefix < buffer.length && prefix < newBuf.length ; ++prefix) { + if (buffer[prefix] != newBuf[prefix]) { + break; + } + } + throw new RuntimeException(String.format( + "Algorithm '%s' is corrupting the data", name)); + } + + // add time record + if (itTime >= BENCHMARK_N_OMIT) { + durations.add(finishTime - startTime); + } + } + printBenchmarkResult(length, durations, true); + } + + private static void printBenchmarkResult(int totalSize, + List durationsInNanoSed, boolean isDecompression) { + long meanTime = 0; + for (long time : durationsInNanoSed) { + meanTime += time; + } + meanTime /= durationsInNanoSed.size(); + + long standardDev = 0; + for (long time : durationsInNanoSed) { + standardDev += (time - meanTime) * (time - meanTime); + } + standardDev = (long) Math.sqrt(standardDev / durationsInNanoSed.size()); + + final double million = 1000.0 * 1000.0 * 1000.0; + double mbPerSec = (totalSize * million) / (1024.0 * 1024.0 * meanTime); + double mbPerSecDev = (totalSize * million) / + (1024.0 * 1024.0 * (meanTime - standardDev)); + + System.out.println(String.format( + " %s performance:%s %6.2f MB/s (+/- %.2f MB/s)", + isDecompression ? "Decompression" : "Compression", + isDecompression ? "" : " ", + mbPerSec, mbPerSecDev - mbPerSec)); + } + + /** + * Display statistics of different compression algorithms. + */ + public void displayStatistics() { + int totalLength = totalPrefixLength + totalKeyLength + totalValueLength; + compressor.reset(); + + for(EncodedDataBlock codec : codecs) { + System.out.println(codec.toString()); + int saved = totalKeyLength + totalPrefixLength + totalValueLength + - codec.getSize(); + System.out.println( + String.format(" Saved bytes: %8d", saved)); + double keyRatio = (saved * 100.0) / (totalPrefixLength + totalKeyLength); + double allRatio = (saved * 100.0) / totalLength; + System.out.println( + String.format(" Key compression ratio: %.2f %%", keyRatio)); + System.out.println( + String.format(" All compression ratio: %.2f %%", allRatio)); + int compressedSize = codec.checkCompressedSize(compressor); + System.out.println( + String.format(" %s compressed size: %8d", + compressionAlgorithmName.toUpperCase(), compressedSize)); + double lzoRatio = 100.0 * (1.0 - compressedSize / (0.0 + totalLength)); + System.out.println( + String.format(" %s compression ratio: %.2f %%", + compressionAlgorithmName.toUpperCase(), lzoRatio)); + } + + System.out.println( + String.format("Total KV prefix length: %8d", totalPrefixLength)); + System.out.println( + String.format("Total key length: %8d", totalKeyLength)); + System.out.println( + String.format("Total key redundancy: %8d", + totalKeyRedundancyLength)); + System.out.println( + String.format("Total value length: %8d", totalValueLength)); + } + + /** + * Test a data block encoder on the given HFile. Output results to console. + * @param kvLimit The limit of KeyValue which will be analyzed. + * @param hfilePath an HFile path on the file system. + * @param compressionName Compression algorithm used for comparison. + * @param doBenchmark Run performance benchmarks. + * @param doVerify Verify correctness. + * @throws IOException When pathName is incorrect. + */ + public static void testCodecs(int kvLimit, String hfilePath, + String compressionName, boolean doBenchmark, boolean doVerify) + throws IOException { + // create environment + Path path = new Path(hfilePath); + Configuration conf = HBaseConfiguration.create(); + CacheConfig cacheConf = new CacheConfig(conf); + FileSystem fs = FileSystem.get(conf); + StoreFile hsf = new StoreFile(fs, path, conf, cacheConf, + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); + + StoreFile.Reader reader = hsf.createReader(); + reader.loadFileInfo(); + KeyValueScanner scanner = reader.getStoreFileScanner(true, true); + + // run the utilities + DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); + comp.checkStatistics(scanner, kvLimit); + if (doVerify) { + comp.verifyCodecs(scanner, kvLimit); + } + if (doBenchmark) { + comp.benchmarkCodecs(); + } + comp.displayStatistics(); + + // cleanup + scanner.close(); + reader.close(cacheConf.shouldEvictOnClose()); + } + + private static void printUsage(Options options) { + System.err.println("Usage:"); + System.err.println(String.format("./hbase %s ", + DataBlockEncodingTool.class.getName())); + System.err.println("Options:"); + for (Object it : options.getOptions()) { + Option opt = (Option) it; + if (opt.hasArg()) { + System.err.println(String.format("-%s %s: %s", opt.getOpt(), + opt.getArgName(), opt.getDescription())); + } else { + System.err.println(String.format("-%s: %s", opt.getOpt(), + opt.getDescription())); + } + } + } + + /** + * A command line interface to benchmarks. + * @param args Should have length at least 1 and holds the file path to HFile. + * @throws IOException If you specified the wrong file. + */ + public static void main(final String[] args) throws IOException { + // set up user arguments + Options options = new Options(); + options.addOption("f", true, "HFile to analyse (REQUIRED)"); + options.getOption("f").setArgName("FILENAME"); + options.addOption("n", true, + "Limit number of KeyValue which will be analysed"); + options.getOption("n").setArgName("NUMBER"); + options.addOption("b", false, "Measure read throughput"); + options.addOption("c", false, "Omit corectness tests."); + options.addOption("a", true, + "What kind of compression algorithm use for comparison."); + + // parse arguments + CommandLineParser parser = new PosixParser(); + CommandLine cmd = null; + try { + cmd = parser.parse(options, args); + } catch (ParseException e) { + System.err.println("Could not parse arguments!"); + System.exit(-1); + return; // avoid warning + } + + int kvLimit = Integer.MAX_VALUE; + if (cmd.hasOption("n")) { + kvLimit = Integer.parseInt(cmd.getOptionValue("n")); + } + + // basic argument sanity checks + if (!cmd.hasOption("f")) { + System.err.println("ERROR: Filename is required!"); + printUsage(options); + System.exit(-1); + } + + if (!(new File(cmd.getOptionValue("f"))).exists()) { + System.err.println(String.format("ERROR: file '%s' doesn't exist!", + cmd.getOptionValue("f"))); + printUsage(options); + System.exit(-1); + } + + String pathName = cmd.getOptionValue("f"); + String compressionName = "gz"; + if (cmd.hasOption("a")) { + compressionName = cmd.getOptionValue("a"); + } + boolean doBenchmark = cmd.hasOption("b"); + boolean doVerify = !cmd.hasOption("c"); + + testCodecs(kvLimit, pathName, compressionName, doBenchmark, doVerify); + } + +} diff --git src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java new file mode 100644 index 0000000..d7b0334 --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java @@ -0,0 +1,171 @@ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.LruBlockCache; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; + +/** + * Test seek performance for encoded data blocks. Read an HFile and do several + * random seeks. + */ +public class EncodedSeekPerformanceTest { + private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0; + private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0; + /** Default number of seeks which will be used in benchmark. */ + public static int DEFAULT_NUMBER_OF_SEEKS = 10000; + + private final HBaseTestingUtility testingUtility = new HBaseTestingUtility(); + private Configuration configuration = testingUtility.getConfiguration(); + private CacheConfig cacheConf = new CacheConfig(configuration); + private Random randomizer; + private int numberOfSeeks; + + /** Use this benchmark with default options */ + public EncodedSeekPerformanceTest() { + configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f); + randomizer = new Random(42l); + numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS; + } + + private List prepareListOfTestSeeks(Path path) throws IOException { + List allKeyValues = new ArrayList(); + + // read all of the key values + StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(), + path, configuration, cacheConf, BloomType.NONE, null); + + StoreFile.Reader reader = storeFile.createReader(); + StoreFileScanner scanner = reader.getStoreFileScanner(true, false); + KeyValue current; + + scanner.seek(KeyValue.LOWESTKEY); + while (null != (current = scanner.next())) { + allKeyValues.add(current); + } + + storeFile.closeReader(cacheConf.shouldEvictOnClose()); + + // pick seeks by random + List seeks = new ArrayList(); + for (int i = 0 ; i < numberOfSeeks ; ++i) { + KeyValue keyValue = allKeyValues.get( + randomizer.nextInt(allKeyValues.size())); + seeks.add(keyValue); + } + + clearBlockCache(); + + return seeks; + } + + private void runTest(Path path, HFileDataBlockEncoder blockEncoder, + List seeks) throws IOException { + // read all of the key values + StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(), + path, configuration, cacheConf, BloomType.NONE, blockEncoder); + + int totalSize = 0; + + StoreFile.Reader reader = storeFile.createReader(); + StoreFileScanner scanner = reader.getStoreFileScanner(true, false); + + long startReadingTime = System.nanoTime(); + KeyValue current; + scanner.seek(KeyValue.LOWESTKEY); + while (null != (current = scanner.next())) { // just iterate it! + totalSize += current.getLength(); + } + long finishReadingTime = System.nanoTime(); + + // do seeks + long startSeeksTime = System.nanoTime(); + for (KeyValue keyValue : seeks) { + scanner.seek(keyValue); + KeyValue toVerify = scanner.next(); + if (!keyValue.equals(toVerify)) { + System.out.println(String.format("KeyValue doesn't match:\n" + + "Orig key: %s\n" + + "Ret key: %s", keyValue.getKeyString(), toVerify.getKeyString())); + break; + } + } + long finishSeeksTime = System.nanoTime(); + + // write some stats + double readInMbPerSec = (totalSize * NANOSEC_IN_SEC) / + (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime)); + double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) / + (finishSeeksTime - startSeeksTime); + + storeFile.closeReader(cacheConf.shouldEvictOnClose()); + clearBlockCache(); + + System.out.println(blockEncoder); + System.out.println(String.format(" Read speed: %8.2f (MB/s)", + readInMbPerSec)); + System.out.println(String.format(" Seeks per second: %8.2f (#/s)", + seeksPerSec)); + } + + /** + * @param path Path to the HFile which will be used. + * @param encoders List of encoders which will be used for tests. + * @throws IOException if there is a bug while reading from disk + */ + public void runTests(Path path, List encoders) + throws IOException { + List seeks = prepareListOfTestSeeks(path); + + for (HFileDataBlockEncoder blockEncoder : encoders) { + runTest(path, blockEncoder, seeks); + } + } + + /** + * Command line interface: + * @param args Takes one argument - file size. + * @throws IOException if there is a bug while reading from disk + */ + public static void main(final String[] args) throws IOException { + if (args.length < 1) { + printUsage(); + System.exit(-1); + } + + Path path = new Path(args[0]); + List encoders = + new ArrayList(); + + encoders.add(new HFileDataBlockEncoderImpl(DataBlockEncoding.NONE)); + for (DataBlockEncoding encodingAlgo : DataBlockEncoding.values()) { + encoders.add(new HFileDataBlockEncoderImpl(DataBlockEncoding.NONE, + encodingAlgo)); + } + + EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest(); + utility.runTests(path, encoders); + + System.exit(0); + } + + private static void printUsage() { + System.out.println("Usage: one argument, name of the HFile"); + } + + private void clearBlockCache() { + ((LruBlockCache) cacheConf.getBlockCache()).clearCache(); + } +} diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java index f5b4c3c..bdc5fe8 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.regionserver.compactions.CompactSelection; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.util.Bytes; @@ -67,7 +68,7 @@ public class TestCompactSelection extends TestCase { this.conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0); this.conf.setInt("hbase.hstore.compaction.min", minFiles); this.conf.setInt("hbase.hstore.compaction.max", maxFiles); - this.conf.setLong("hbase.hregion.memstore.flush.size", minSize); + this.conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, minSize); this.conf.setLong("hbase.hstore.compaction.max.size", maxSize); this.conf.setFloat("hbase.hstore.compaction.ratio", 1.0F); @@ -102,7 +103,8 @@ public class TestCompactSelection extends TestCase { MockStoreFile(long length, boolean isRef) throws IOException { super(TEST_UTIL.getTestFileSystem(), TEST_FILE, TEST_UTIL.getConfiguration(), - new CacheConfig(TEST_UTIL.getConfiguration()), BloomType.NONE); + new CacheConfig(TEST_UTIL.getConfiguration()), BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE); this.length = length; this.isRef = isRef; } diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index 71c1c4a..2f5a699 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -24,8 +24,10 @@ import static org.mockito.Mockito.spy; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; - +import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -33,20 +35,24 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; -import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.junit.AfterClass; -import org.junit.BeforeClass; import org.junit.experimental.categories.Category; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -61,6 +67,7 @@ public class TestCompaction extends HBaseTestCase { private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); private HRegion r = null; + private HTableDescriptor htd = null; private Path compactionDir = null; private Path regionCompactionDir = null; private static final byte [] COLUMN_FAMILY = fam1; @@ -70,13 +77,12 @@ public class TestCompaction extends HBaseTestCase { private byte[] firstRowBytes, secondRowBytes, thirdRowBytes; final private byte[] col1, col2; - /** constructor */ public TestCompaction() throws Exception { super(); // Set cache flush size to 1MB - conf.setInt("hbase.hregion.memstore.flush.size", 1024*1024); + conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024*1024); conf.setInt("hbase.hregion.memstore.block.multiplier", 100); compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); @@ -94,7 +100,7 @@ public class TestCompaction extends HBaseTestCase { @Override public void setUp() throws Exception { super.setUp(); - HTableDescriptor htd = createTableDescriptor(getName()); + this.htd = createTableDescriptor(getName()); this.r = createNewHRegion(htd, null, null); } @@ -146,6 +152,42 @@ public class TestCompaction extends HBaseTestCase { * @throws Exception */ public void testMajorCompaction() throws Exception { + majorCompaction(); + } + + public void testDataBlockEncodingInCacheOnly() throws Exception { + majorCompactionWithDataBlockEncoding(true); + } + + public void testDataBlockEncodingEverywhere() throws Exception { + majorCompactionWithDataBlockEncoding(false); + } + + public void majorCompactionWithDataBlockEncoding(boolean inCacheOnly) + throws Exception { + Map replaceBlockCache = + new HashMap(); + for (Entry pair : r.getStores().entrySet()) { + Store store = pair.getValue(); + HFileDataBlockEncoder blockEncoder = store.getDataBlockEncoder(); + replaceBlockCache.put(pair.getValue(), blockEncoder); + final DataBlockEncoding inCache = DataBlockEncoding.PREFIX; + final DataBlockEncoding onDisk = inCacheOnly ? DataBlockEncoding.NONE : + inCache; + store.setDataBlockEncoderInTest(new HFileDataBlockEncoderImpl( + onDisk, inCache)); + } + + majorCompaction(); + + // restore settings + for (Entry entry : + replaceBlockCache.entrySet()) { + entry.getKey().setDataBlockEncoderInTest(entry.getValue()); + } + } + + private void majorCompaction() throws Exception { createStoreFile(r); for (int i = 0; i < compactionThreshold; i++) { createStoreFile(r); @@ -174,10 +216,10 @@ public class TestCompaction extends HBaseTestCase { CompactionProgress progress = store.getCompactionProgress(); if( progress != null ) { ++storeCount; - assert(progress.currentCompactedKVs > 0); - assert(progress.totalCompactingKVs > 0); + assertTrue(progress.currentCompactedKVs > 0); + assertTrue(progress.totalCompactingKVs > 0); } - assert(storeCount > 0); + assertTrue(storeCount > 0); } // look at the second row @@ -186,14 +228,20 @@ public class TestCompaction extends HBaseTestCase { secondRowBytes[START_KEY_BYTES.length - 1]++; // Always 3 versions if that is what max versions is. - result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null); - assertEquals(compactionThreshold, result.size()); + result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT). + setMaxVersions(100), null); + LOG.debug("Row " + Bytes.toStringBinary(secondRowBytes) + " after " + + "initial compaction: " + result); + assertEquals("Invalid number of versions of row " + + Bytes.toStringBinary(secondRowBytes) + ".", compactionThreshold, + result.size()); // Now add deletes to memstore and then flush it. // That will put us over // the compaction threshold of 3 store files. Compacting these store files // should result in a compacted store file that has no references to the // deleted row. + LOG.debug("Adding deletes to memstore and flushing"); Delete delete = new Delete(secondRowBytes, System.currentTimeMillis(), null); byte [][] famAndQf = {COLUMN_FAMILY, null}; delete.deleteFamily(famAndQf[0]); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java index b7f392b..de39712 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2; import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; @@ -193,7 +194,8 @@ public class TestCompoundBloomFilter { private void readStoreFile(int t, BloomType bt, List kvs, Path sfPath) throws IOException { - StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt); + StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt, + NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader r = sf.createReader(); final boolean pread = true; // does not really matter StoreFileScanner scanner = r.getStoreFileScanner(true, pread); @@ -293,7 +295,7 @@ public class TestCompoundBloomFilter { cacheConf = new CacheConfig(conf); StoreFile.Writer w = StoreFile.createWriter(fs, - TEST_UTIL.getDataTestDir(), BLOCK_SIZES[t], null, null, conf, + TEST_UTIL.getDataTestDir(), BLOCK_SIZES[t], null, null, null, conf, cacheConf, bt, 0); assertTrue(w.hasGeneralBloom()); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java index f42f124..91b869e 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; @@ -76,7 +77,9 @@ public class TestFSErrorsExposed { writer, Bytes.toBytes("cf"), Bytes.toBytes("qual")); StoreFile sf = new StoreFile(fs, writer.getPath(), - util.getConfiguration(), cacheConf, StoreFile.BloomType.NONE); + util.getConfiguration(), cacheConf, StoreFile.BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE); + StoreFile.Reader reader = sf.createReader(); HFileScanner scanner = reader.getScanner(false, true); @@ -119,7 +122,8 @@ public class TestFSErrorsExposed { writer, Bytes.toBytes("cf"), Bytes.toBytes("qual")); StoreFile sf = new StoreFile(fs, writer.getPath(), util.getConfiguration(), - cacheConf, BloomType.NONE); + cacheConf, BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); + List scanners = StoreFileScanner.getScannersForStoreFiles( Collections.singletonList(sf), false, true, false); KeyValueScanner scanner = scanners.get(0); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java index 18d8ba7..6927ea0 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java @@ -35,6 +35,7 @@ import java.util.Random; import java.util.Set; import java.util.TreeSet; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -45,6 +46,7 @@ import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; @@ -107,8 +109,9 @@ public class TestMultiColumnScanner { private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); - private Compression.Algorithm comprAlgo; - private StoreFile.BloomType bloomType; + private final Compression.Algorithm comprAlgo; + private final StoreFile.BloomType bloomType; + private final DataBlockEncoding dataBlockEncoding; // Some static sanity-checking. static { @@ -127,20 +130,30 @@ public class TestMultiColumnScanner { @Parameters public static final Collection parameters() { - return HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS; + List parameters = new ArrayList(); + for (Object[] bloomAndCompressionParams : + HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) { + for (boolean useDataBlockEncoding : new boolean[]{false, true}) { + parameters.add(ArrayUtils.add(bloomAndCompressionParams, + useDataBlockEncoding)); + } + } + return parameters; } public TestMultiColumnScanner(Compression.Algorithm comprAlgo, - StoreFile.BloomType bloomType) { + StoreFile.BloomType bloomType, boolean useDataBlockEncoding) { this.comprAlgo = comprAlgo; this.bloomType = bloomType; + this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX : + DataBlockEncoding.NONE; } @Test public void testMultiColumnScanner() throws IOException { HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME, FAMILY, comprAlgo, bloomType, MAX_VERSIONS, HColumnDescriptor.DEFAULT_BLOCKCACHE, - HFile.DEFAULT_BLOCKSIZE); + HFile.DEFAULT_BLOCKSIZE, dataBlockEncoding); List rows = sequentialStrings("row", NUM_ROWS); List qualifiers = sequentialStrings("qual", NUM_COLUMNS); List kvs = new ArrayList(); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java index 539ba13..3a8ae91 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.KeyValueTestUtil; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter; @@ -105,7 +106,8 @@ public class TestScanWithBloomError { region = TEST_UTIL.createTestRegion(TABLE_NAME, FAMILY, Compression.Algorithm.GZ, bloomType, TestMultiColumnScanner.MAX_VERSIONS, - HColumnDescriptor.DEFAULT_BLOCKCACHE, HFile.DEFAULT_BLOCKSIZE); + HColumnDescriptor.DEFAULT_BLOCKCACHE, HFile.DEFAULT_BLOCKSIZE, + DataBlockEncoding.NONE); createStoreFile(new int[] {1, 2, 6}); createStoreFile(new int[] {1, 2, 3, 7}); createStoreFile(new int[] {1, 9}); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java index 1d7aa33..a902816 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.Bytes; @@ -144,7 +145,7 @@ public class TestSeekOptimizations { region = TEST_UTIL.createTestRegion( TestSeekOptimizations.class.getName(), FAMILY, comprAlgo, bloomType, Integer.MAX_VALUE, HColumnDescriptor.DEFAULT_BLOCKCACHE, - HFile.DEFAULT_BLOCKSIZE); + HFile.DEFAULT_BLOCKSIZE, DataBlockEncoding.NONE); // Delete the given timestamp and everything before. final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1; diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java index 37fa344..037cf27 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java @@ -34,18 +34,24 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.Reference.Range; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheStats; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.experimental.categories.Category; import org.mockito.Mockito; @@ -88,7 +94,7 @@ public class TestStoreFile extends HBaseTestCase { conf, cacheConf); writeStoreFile(writer); checkHalfHFile(new StoreFile(this.fs, writer.getPath(), conf, cacheConf, - StoreFile.BloomType.NONE)); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE)); } private void writeStoreFile(final StoreFile.Writer writer) throws IOException { @@ -129,7 +135,7 @@ public class TestStoreFile extends HBaseTestCase { conf, cacheConf); writeStoreFile(writer); StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader reader = hsf.createReader(); // Split on a row, not in middle of row. Midkey returned by reader // may be in middle of row. Create new one with empty column and @@ -141,7 +147,7 @@ public class TestStoreFile extends HBaseTestCase { // Make a reference Path refPath = StoreFile.split(fs, dir, hsf, midRow, Range.top); StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); // Now confirm that I can read from the reference and that it only gets // keys from top half of the file. HFileScanner s = refHsf.createReader().getScanner(false, false); @@ -177,10 +183,12 @@ public class TestStoreFile extends HBaseTestCase { Path bottomPath = StoreFile.split(this.fs, bottomDir, f, midRow, Range.bottom); // Make readers on top and bottom. - StoreFile.Reader top = new StoreFile(this.fs, topPath, conf, cacheConf, - StoreFile.BloomType.NONE).createReader(); - StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, - StoreFile.BloomType.NONE).createReader(); + StoreFile.Reader top = + new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE).createReader(); + StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath, + conf, cacheConf, BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE).createReader(); ByteBuffer previous = null; LOG.info("Midkey: " + midKV.toString()); ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey); @@ -236,9 +244,11 @@ public class TestStoreFile extends HBaseTestCase { bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey, Range.bottom); top = new StoreFile(this.fs, topPath, conf, cacheConf, - StoreFile.BloomType.NONE).createReader(); + StoreFile.BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE).createReader(); bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, - StoreFile.BloomType.NONE).createReader(); + StoreFile.BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE).createReader(); bottomScanner = bottom.getScanner(false, false); int count = 0; while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || @@ -281,9 +291,11 @@ public class TestStoreFile extends HBaseTestCase { bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey, Range.bottom); top = new StoreFile(this.fs, topPath, conf, cacheConf, - StoreFile.BloomType.NONE).createReader(); + StoreFile.BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE).createReader(); bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, - StoreFile.BloomType.NONE).createReader(); + StoreFile.BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE).createReader(); first = true; bottomScanner = bottom.getScanner(false, false); while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || @@ -339,7 +351,8 @@ public class TestStoreFile extends HBaseTestCase { } writer.close(); - StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf); + StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, + DataBlockEncoding.NONE); reader.loadFileInfo(); reader.loadBloomfilter(); StoreFileScanner scanner = reader.getStoreFileScanner(false, false); @@ -378,10 +391,10 @@ public class TestStoreFile extends HBaseTestCase { // write the file Path f = new Path(ROOT_DIR, getName()); - StoreFile.Writer writer = new StoreFile.Writer(fs, f, - StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM, - conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000); - + StoreFile.Writer writer = + new StoreFile.Writer(fs, f, StoreFile.DEFAULT_BLOCKSIZE_SMALL, + HFile.DEFAULT_COMPRESSION_ALGORITHM, null, conf, cacheConf, + KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000); bloomWriteRead(writer, fs); } @@ -398,7 +411,8 @@ public class TestStoreFile extends HBaseTestCase { StoreFile.Writer writer = new StoreFile.Writer(fs, f, StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM, - conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE, 2000); + null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE, + 2000); // add delete family long now = System.currentTimeMillis(); @@ -410,7 +424,8 @@ public class TestStoreFile extends HBaseTestCase { } writer.close(); - StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf); + StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, + DataBlockEncoding.NONE); reader.loadFileInfo(); reader.loadBloomfilter(); @@ -465,7 +480,7 @@ public class TestStoreFile extends HBaseTestCase { StoreFile.Writer writer = new StoreFile.Writer(fs, f, StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM, - conf, cacheConf, KeyValue.COMPARATOR, bt[x], expKeys[x]); + null, conf, cacheConf, KeyValue.COMPARATOR, bt[x], expKeys[x]); long now = System.currentTimeMillis(); for (int i = 0; i < rowCount*2; i += 2) { // rows @@ -482,7 +497,8 @@ public class TestStoreFile extends HBaseTestCase { } writer.close(); - StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf); + StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, + DataBlockEncoding.NONE); reader.loadFileInfo(); reader.loadBloomfilter(); StoreFileScanner scanner = reader.getStoreFileScanner(false, false); @@ -536,7 +552,8 @@ public class TestStoreFile extends HBaseTestCase { // this should not create a bloom because the max keys is too small StoreFile.Writer writer = new StoreFile.Writer(fs, f, StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM, - conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000); + null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, + 2000); assertFalse(writer.hasGeneralBloom()); writer.close(); fs.delete(f, true); @@ -559,7 +576,7 @@ public class TestStoreFile extends HBaseTestCase { // because Java can't create a contiguous array > MAX_INT writer = new StoreFile.Writer(fs, f, StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM, - conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, + null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, Integer.MAX_VALUE); assertFalse(writer.hasGeneralBloom()); writer.close(); @@ -664,7 +681,7 @@ public class TestStoreFile extends HBaseTestCase { writer.close(); StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader reader = hsf.createReader(); StoreFileScanner scanner = reader.getStoreFileScanner(false, false); TreeSet columns = new TreeSet(); @@ -707,7 +724,7 @@ public class TestStoreFile extends HBaseTestCase { Path pathCowOff = new Path(baseDir, "123456789"); StoreFile.Writer writer = writeStoreFile(conf, cacheConf, pathCowOff, 3); StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); LOG.debug(hsf.getPath().toString()); // Read this file, we should see 3 misses @@ -729,7 +746,7 @@ public class TestStoreFile extends HBaseTestCase { Path pathCowOn = new Path(baseDir, "123456788"); writer = writeStoreFile(conf, cacheConf, pathCowOn, 3); hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); // Read this file, we should see 3 hits reader = hsf.createReader(); @@ -745,13 +762,13 @@ public class TestStoreFile extends HBaseTestCase { // Let's read back the two files to ensure the blocks exactly match hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader readerOne = hsf.createReader(); readerOne.loadFileInfo(); StoreFileScanner scannerOne = readerOne.getStoreFileScanner(true, true); scannerOne.seek(KeyValue.LOWESTKEY); hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader readerTwo = hsf.createReader(); readerTwo.loadFileInfo(); StoreFileScanner scannerTwo = readerTwo.getStoreFileScanner(true, true); @@ -782,7 +799,7 @@ public class TestStoreFile extends HBaseTestCase { conf.setBoolean("hbase.rs.evictblocksonclose", true); cacheConf = new CacheConfig(conf); hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); reader = hsf.createReader(); reader.close(cacheConf.shouldEvictOnClose()); @@ -796,7 +813,7 @@ public class TestStoreFile extends HBaseTestCase { conf.setBoolean("hbase.rs.evictblocksonclose", false); cacheConf = new CacheConfig(conf); hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf, - StoreFile.BloomType.NONE); + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); reader = hsf.createReader(); reader.close(cacheConf.shouldEvictOnClose()); @@ -823,7 +840,8 @@ public class TestStoreFile extends HBaseTestCase { int blockSize = totalSize / numBlocks; StoreFile.Writer writer = new StoreFile.Writer(fs, path, blockSize, HFile.DEFAULT_COMPRESSION_ALGORITHM, - conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE, 2000); + null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE, + 2000); // We'll write N-1 KVs to ensure we don't write an extra block kvs.remove(kvs.size()-1); for (KeyValue kv : kvs) { @@ -834,6 +852,42 @@ public class TestStoreFile extends HBaseTestCase { return writer; } + /** + * Check if data block encoding information is saved correctly in HFile's + * file info. + */ + public void testDataBlockEncodingMetaData() throws IOException { + Path dir = new Path(new Path(this.testDir, "regionname"), "familyname"); + Path path = new Path(dir, "1234567890"); + + DataBlockEncoding dataBlockEncoderAlgo = + DataBlockEncoding.FAST_DIFF; + HFileDataBlockEncoder dataBlockEncoder = + new HFileDataBlockEncoderImpl( + dataBlockEncoderAlgo, + dataBlockEncoderAlgo); + cacheConf = new CacheConfig(conf); + StoreFile.Writer writer = new StoreFile.Writer(fs, + path, HFile.DEFAULT_BLOCKSIZE, + HFile.DEFAULT_COMPRESSION_ALGORITHM, + dataBlockEncoder, + conf, + cacheConf, + KeyValue.COMPARATOR, + StoreFile.BloomType.NONE, + 2000); + writer.close(); + + StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf, + cacheConf, BloomType.NONE, dataBlockEncoder); + StoreFile.Reader reader = storeFile.createReader(); + + Map fileInfo = reader.loadFileInfo(); + byte[] value = fileInfo.get(StoreFile.DATA_BLOCK_ENCODING); + + assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value); + } + @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java index 6e89cc4..d4e4633 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java @@ -130,7 +130,8 @@ public class TestLogRolling { TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.optionalflushcount", 2); // We flush the cache after every 8192 bytes - TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.flush.size", 8192); + TEST_UTIL.getConfiguration().setInt( + HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192); // Increase the amount of time between client retries TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 10 * 1000); diff --git src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java index 59910bf..23d27fd 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java @@ -481,7 +481,7 @@ public class TestWALReplay { runWALSplit(newConf); FileSystem newFS = FileSystem.get(newConf); // 100k seems to make for about 4 flushes during HRegion#initialize. - newConf.setInt("hbase.hregion.memstore.flush.size", 1024 * 100); + newConf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 100); // Make a new wal for new region. HLog newWal = createWAL(newConf); final AtomicInteger flushcount = new AtomicInteger(0); diff --git src/test/java/org/apache/hadoop/hbase/util/LoadTestKVGenerator.java src/test/java/org/apache/hadoop/hbase/util/LoadTestKVGenerator.java index ba38f6a..ba125a6 100644 --- src/test/java/org/apache/hadoop/hbase/util/LoadTestKVGenerator.java +++ src/test/java/org/apache/hadoop/hbase/util/LoadTestKVGenerator.java @@ -68,7 +68,7 @@ public class LoadTestKVGenerator { String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey)); // flip the key to randomize - return md5hash + ":" + stringKey; + return md5hash + "-" + stringKey; } /** diff --git src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java index 2e175da..5d30df7 100644 --- src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java +++ src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.PerformanceEvaluation; import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -70,8 +71,18 @@ public class LoadTestTool extends AbstractHBaseTool { private static final String OPT_USAGE_COMPRESSION = "Compression type, " + "one of " + Arrays.toString(Compression.Algorithm.values()); + public static final String OPT_DATA_BLOCK_ENCODING_USAGE = + "Encoding algorithm (e.g. prefix " + + "compression) to use for data blocks in the test column family, " + + "one of " + Arrays.toString(DataBlockEncoding.values()) + "."; + private static final String OPT_BLOOM = "bloom"; private static final String OPT_COMPRESSION = "compression"; + public static final String OPT_DATA_BLOCK_ENCODING = + HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase(); + private static final String OPT_ENCODE_IN_CACHE_ONLY = + HColumnDescriptor.ENCODE_ON_DISK.toLowerCase(); + private static final String OPT_KEY_WINDOW = "key_window"; private static final String OPT_WRITE = "write"; private static final String OPT_MAX_READ_ERRORS = "max_read_errors"; @@ -82,6 +93,8 @@ public class LoadTestTool extends AbstractHBaseTool { private static final String OPT_TABLE_NAME = "tn"; private static final String OPT_ZK_QUORUM = "zk"; + private static final long DEFAULT_START_KEY = 0; + /** This will be removed as we factor out the dependency on command line */ private CommandLine cmd; @@ -92,6 +105,10 @@ public class LoadTestTool extends AbstractHBaseTool { private boolean isWrite, isRead; + // Column family options + private DataBlockEncoding dataBlockEncodingAlgo; + private boolean encodeInCacheOnly; + // Writer options private int numWriterThreads = DEFAULT_NUM_THREADS; private long minColsPerKey, maxColsPerKey; @@ -104,13 +121,6 @@ public class LoadTestTool extends AbstractHBaseTool { private int maxReadErrors = MultiThreadedReader.DEFAULT_MAX_ERRORS; private int verifyPercent; - /** Create tables if needed. */ - public void createTables() throws IOException { - HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName, - COLUMN_FAMILY); - applyBloomFilterAndCompression(tableName, COLUMN_FAMILIES); - } - private String[] splitColonSeparated(String option, int minNumCols, int maxNumCols) { String optVal = cmd.getOptionValue(option); @@ -129,9 +139,10 @@ public class LoadTestTool extends AbstractHBaseTool { } /** - * Apply the given Bloom filter type to all column families we care about. + * Apply column family options such as Bloom filters, compression, and data + * block encoding. */ - private void applyBloomFilterAndCompression(byte[] tableName, + private void applyColumnFamilyOptions(byte[] tableName, byte[][] columnFamilies) throws IOException { String bloomStr = cmd.getOptionValue(OPT_BLOOM); StoreFile.BloomType bloomType = bloomStr == null ? null : @@ -141,19 +152,22 @@ public class LoadTestTool extends AbstractHBaseTool { Compression.Algorithm compressAlgo = compressStr == null ? null : Compression.Algorithm.valueOf(compressStr); - if (bloomStr == null && compressStr == null) - return; - HBaseAdmin admin = new HBaseAdmin(conf); HTableDescriptor tableDesc = admin.getTableDescriptor(tableName); LOG.info("Disabling table " + Bytes.toString(tableName)); admin.disableTable(tableName); for (byte[] cf : columnFamilies) { HColumnDescriptor columnDesc = tableDesc.getFamily(cf); - if (bloomStr != null) + if (bloomStr != null) { columnDesc.setBloomFilterType(bloomType); - if (compressStr != null) + } + if (compressStr != null) { columnDesc.setCompressionType(compressAlgo); + } + if (dataBlockEncodingAlgo != null) { + columnDesc.setDataBlockEncoding(dataBlockEncodingAlgo); + columnDesc.setEncodeOnDisk(!encodeInCacheOnly); + } admin.modifyColumn(tableName, columnDesc); } LOG.info("Enabling table " + Bytes.toString(tableName)); @@ -169,17 +183,23 @@ public class LoadTestTool extends AbstractHBaseTool { addOptWithArg(OPT_READ, OPT_USAGE_READ); addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM); addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION); + addOptWithArg(OPT_DATA_BLOCK_ENCODING, OPT_DATA_BLOCK_ENCODING_USAGE); addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " + "to tolerate before terminating all reader threads. The default is " + MultiThreadedReader.DEFAULT_MAX_ERRORS + "."); addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " + "reads and writes for concurrent write/read workload. The default " + "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + "."); + addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " + "separate puts for every column in a row"); + addOptNoArg(OPT_ENCODE_IN_CACHE_ONLY, "If this is specified, data " + + "blocks will only be encoded in block cache but not on disk"); addRequiredOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write"); - addRequiredOptWithArg(OPT_START_KEY, "The first key to read/write"); + addOptWithArg(OPT_START_KEY, "The first key to read/write " + + "(a 0-based index). The default value is " + + DEFAULT_START_KEY + "."); } @Override @@ -188,8 +208,8 @@ public class LoadTestTool extends AbstractHBaseTool { tableName = Bytes.toBytes(cmd.getOptionValue(OPT_TABLE_NAME, DEFAULT_TABLE_NAME)); - startKey = parseLong(cmd.getOptionValue(OPT_START_KEY), 0, - Long.MAX_VALUE); + startKey = parseLong(cmd.getOptionValue(OPT_START_KEY, + String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE); long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1, Long.MAX_VALUE - startKey); endKey = startKey + numKeys; @@ -202,6 +222,16 @@ public class LoadTestTool extends AbstractHBaseTool { "-" + OPT_READ + " has to be specified"); } + encodeInCacheOnly = cmd.hasOption(OPT_ENCODE_IN_CACHE_ONLY); + + String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING); + dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null : + DataBlockEncoding.valueOf(dataBlockEncodingStr); + if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) { + throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " + + "does not make sense when data block encoding is not used"); + } + if (isWrite) { String[] writeOpts = splitColonSeparated(OPT_WRITE, 2, 3); @@ -248,7 +278,7 @@ public class LoadTestTool extends AbstractHBaseTool { System.out.println("Reader threads: " + numReaderThreads); } - System.out.println("Key range: " + startKey + ".." + (endKey - 1)); + System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]"); } @Override @@ -257,7 +287,9 @@ public class LoadTestTool extends AbstractHBaseTool { conf.set(HConstants.ZOOKEEPER_QUORUM, cmd.getOptionValue(OPT_ZK_QUORUM)); } - createTables(); + HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName, + COLUMN_FAMILY, dataBlockEncodingAlgo); + applyColumnFamilyOptions(tableName, COLUMN_FAMILIES); if (isWrite) { writerThreads = new MultiThreadedWriter(conf, tableName, COLUMN_FAMILY); diff --git src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java index 4fd1451..a05a121 100644 --- src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java +++ src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java @@ -242,7 +242,7 @@ public class MultiThreadedReader extends MultiThreadedAction } public void queryKey(Get get, boolean verify) throws IOException { - String rowKey = new String(get.getRow()); + String rowKey = Bytes.toString(get.getRow()); // read the data long start = System.currentTimeMillis(); diff --git src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java index c2447b0..4bfc2a9 100644 --- src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java +++ src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java @@ -123,7 +123,6 @@ public class MultiThreadedWriter extends MultiThreadedAction { private class HBaseWriterThread extends Thread { private final HTable table; - private final int writerId; private final Random random = new Random(); private final LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator( @@ -132,7 +131,6 @@ public class MultiThreadedWriter extends MultiThreadedAction { public HBaseWriterThread(int writerId) throws IOException { setName(getClass().getSimpleName() + "_" + writerId); table = new HTable(conf, tableName); - this.writerId = writerId; } public void run() { @@ -166,7 +164,7 @@ public class MultiThreadedWriter extends MultiThreadedAction { public void insert(long rowKey, long col) { Put put = new Put(longToByteArrayKey(rowKey)); String colAsStr = String.valueOf(col); - put.add(columnFamily, colAsStr.getBytes(), + put.add(columnFamily, Bytes.toBytes(colAsStr), dataGenerator.generateRandomSizeValue(rowKey, colAsStr)); try { long start = System.currentTimeMillis(); diff --git src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java index 825846f..6b2a7f1 100644 --- src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java +++ src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; /** * A command-line tool that spins up a local process-based cluster, loads @@ -96,7 +97,7 @@ public class RestartMetaTest extends AbstractHBaseTool { // create tables if needed HBaseTestingUtility.createPreSplitLoadTestTable(conf, TABLE_NAME, - LoadTestTool.COLUMN_FAMILY); + LoadTestTool.COLUMN_FAMILY, DataBlockEncoding.NONE); LOG.debug("Loading data....\n\n"); loadData(); @@ -133,6 +134,8 @@ public class RestartMetaTest extends AbstractHBaseTool { protected void addOptions() { addRequiredOptWithArg(OPT_HBASE_HOME, "HBase home directory"); addOptWithArg(OPT_NUM_RS, "Number of Region Servers"); + addOptWithArg(LoadTestTool.OPT_DATA_BLOCK_ENCODING, + LoadTestTool.OPT_DATA_BLOCK_ENCODING_USAGE); } @Override diff --git src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java new file mode 100644 index 0000000..40a5948 --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.Collections; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.WritableUtils; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestByteBufferUtils { + + private byte[] array; + + /** + * Create an array with sample data. + */ + @Before + public void setUp() { + array = new byte[8]; + for (int i = 0 ; i < array.length ; ++i) { + array[i] = (byte) ('a' + i); + } + } + + private static final int MAX_VLONG_LENGTH = 9; + private static final Collection testNumbers; + + private static void addNumber(Set a, long l) { + if (l != Long.MIN_VALUE) { + a.add(l - 1); + } + a.add(l); + if (l != Long.MAX_VALUE) { + a.add(l + 1); + } + for (long divisor = 3; divisor <= 10; ++divisor) { + for (long delta = -1; delta <= 1; ++delta) { + a.add(l / divisor + delta); + } + } + } + + static { + SortedSet a = new TreeSet(); + for (int i = 0; i <= 63; ++i) { + long v = (-1L) << i; + assertTrue(v < 0); + addNumber(a, v); + v = (1L << i) - 1; + assertTrue(v >= 0); + addNumber(a, v); + } + + testNumbers = Collections.unmodifiableSet(a); + System.err.println("Testing variable-length long serialization using: " + + testNumbers + " (count: " + testNumbers.size() + ")"); + assertEquals(1753, testNumbers.size()); + assertEquals(Long.MIN_VALUE, a.first().longValue()); + assertEquals(Long.MAX_VALUE, a.last().longValue()); + } + + @Test + public void testReadWriteVLong() { + for (long l : testNumbers) { + ByteBuffer b = ByteBuffer.allocate(MAX_VLONG_LENGTH); + ByteBufferUtils.writeVLong(b, l); + b.flip(); + assertEquals(l, ByteBufferUtils.readVLong(b)); + } + } + + @Test + public void testConsistencyWithHadoopVLong() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + for (long l : testNumbers) { + baos.reset(); + ByteBuffer b = ByteBuffer.allocate(MAX_VLONG_LENGTH); + ByteBufferUtils.writeVLong(b, l); + String bufStr = Bytes.toStringBinary(b.array(), + b.arrayOffset(), b.position()); + WritableUtils.writeVLong(dos, l); + String baosStr = Bytes.toStringBinary(baos.toByteArray()); + assertEquals(baosStr, bufStr); + } + } + + /** + * Test copying to stream from buffer. + */ + @Test + public void testCopyToStream() { + ByteBuffer buffer = ByteBuffer.wrap(array); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + try { + ByteBufferUtils.copyToStream(bos, buffer, array.length); + } catch (IOException e) { + fail("IOException in testCopyToStream()"); + } + assertArrayEquals(bos.toByteArray(), array); + } + + /** + * Test copying to stream one byte. + * @throws IOException On test failure. + */ + @Test + public void testCopyToStreamByte() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + for (int i = 0 ; i < array.length ; ++i) { + ByteBufferUtils.copyToStream(bos, array[i]); + } + byte[] actual = bos.toByteArray(); + for (int i = 0 ; i < array.length ; ++i) { + assertEquals(array[i], actual[i]); + } + } + + /** + * Test copying to stream from buffer with offset. + * @throws IOException On test failure. + */ + @Test + public void testCopyToStreamWithOffset() throws IOException { + ByteBuffer buffer = ByteBuffer.wrap(array); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + ByteBufferUtils.copyToStream(bos, buffer, array.length / 2, + array.length / 2); + + byte[] returnedArray = bos.toByteArray(); + for (int i = 0 ; i < array.length / 2 ; ++i) { + int pos = array.length / 2 + i; + assertEquals(returnedArray[i], array[pos]); + } + } + + /** + * Test copying data from stream. + * @throws IOException On test failure. + */ + @Test + public void testCopyFromStream() throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(array.length); + ByteArrayInputStream bis = new ByteArrayInputStream(array); + DataInputStream dis = new DataInputStream(bis); + + ByteBufferUtils.copyFromStream(dis, buffer, array.length / 2); + ByteBufferUtils.copyFromStream(dis, buffer, + array.length - array.length / 2); + for (int i = 0 ; i < array.length ; ++i) { + assertEquals(array[i], buffer.get(i)); + } + } + + /** + * Test copying from buffer. + */ + @Test + public void testCopyFromBuffer() { + ByteBuffer srcBuffer = ByteBuffer.allocate(array.length); + ByteBuffer dstBuffer = ByteBuffer.allocate(array.length); + srcBuffer.put(array); + + ByteBufferUtils.copyFromBuffer(srcBuffer, dstBuffer, + array.length / 2, array.length / 4); + for (int i = 0 ; i < array.length / 4 ; ++i) { + assertEquals(srcBuffer.get(i + array.length / 2), + dstBuffer.get(i)); + } + } + + /** + * Test 7-bit encoding of integers. + * @throws IOException On test failure. + */ + @Test + public void testCompressedInt() throws IOException { + testCompressedInt(0); + testCompressedInt(Integer.MAX_VALUE); + testCompressedInt(Integer.MIN_VALUE); + + for (int i = 0 ; i < 3 ; i++) { + testCompressedInt((128 << i) - 1); + } + + for (int i = 0 ; i < 3 ; i++) { + testCompressedInt((128 << i)); + } + } + + /** + * Test fitting integer in less bytes. + */ + @Test + public void testFixedInt() { + testFixedInt(0, 1); + testFixedInt(Integer.MAX_VALUE, 4); + testFixedInt(Integer.MIN_VALUE, 4); + + for (int i = 0 ; i < 3 ; i++) { + testFixedInt((128 << i) - 1, i + 1); + } + + for (int i = 0 ; i < 3 ; i++) { + testFixedInt((128 << i), 2 + i); + } + } + + /** + * Test how much bytes we need to store integer. + */ + @Test + public void testIntFitsIn() { + assertEquals(1, ByteBufferUtils.intFitsIn(0)); + assertEquals(1, ByteBufferUtils.intFitsIn(1)); + assertEquals(2, ByteBufferUtils.intFitsIn(1 << 8)); + assertEquals(3, ByteBufferUtils.intFitsIn(1 << 16)); + assertEquals(4, ByteBufferUtils.intFitsIn(-1)); + assertEquals(4, ByteBufferUtils.intFitsIn(Integer.MAX_VALUE)); + assertEquals(4, ByteBufferUtils.intFitsIn(Integer.MIN_VALUE)); + } + + /** + * Test how much bytes we need to store long. + */ + @Test + public void testLongFitsIn() { + assertEquals(1, ByteBufferUtils.longFitsIn(0)); + assertEquals(1, ByteBufferUtils.longFitsIn(1)); + assertEquals(3, ByteBufferUtils.longFitsIn(1l << 16)); + assertEquals(5, ByteBufferUtils.longFitsIn(1l << 32)); + assertEquals(8, ByteBufferUtils.longFitsIn(-1)); + assertEquals(8, ByteBufferUtils.longFitsIn(Long.MIN_VALUE)); + assertEquals(8, ByteBufferUtils.longFitsIn(Long.MAX_VALUE)); + } + + /** + * Test if we are comparing equal bytes. + */ + @Test + public void testArePartEqual() { + byte[] array = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3, 4 }; + ByteBuffer buffer = ByteBuffer.wrap(array); + assertTrue(ByteBufferUtils.arePartsEqual(buffer, 0, 4, 5, 4)); + assertTrue(ByteBufferUtils.arePartsEqual(buffer, 1, 2, 6, 2)); + assertFalse(ByteBufferUtils.arePartsEqual(buffer, 1, 2, 6, 3)); + assertFalse(ByteBufferUtils.arePartsEqual(buffer, 1, 3, 6, 2)); + assertFalse(ByteBufferUtils.arePartsEqual(buffer, 0, 3, 6, 3)); + } + + /** + * Test serializing int to bytes + */ + @Test + public void testPutInt() { + testPutInt(0); + testPutInt(Integer.MAX_VALUE); + + for (int i = 0 ; i < 3 ; i++) { + testPutInt((128 << i) - 1); + } + + for (int i = 0 ; i < 3 ; i++) { + testPutInt((128 << i)); + } + } + + // Utility methods invoked from test methods + + private void testCompressedInt(int value) throws IOException { + int parsedValue = 0; + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ByteBufferUtils.putCompressedInt(bos, value); + + ByteArrayInputStream bis = new ByteArrayInputStream( + bos.toByteArray()); + parsedValue = ByteBufferUtils.readCompressedInt(bis); + + assertEquals(value, parsedValue); + } + + private void testFixedInt(int value, int width) { + int parsedValue = 0; + byte[] bValue = new byte[Bytes.SIZEOF_INT]; + Bytes.putInt(bValue, 0, value); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + bos.write(bValue, Bytes.SIZEOF_INT - width, width); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + try { + parsedValue = ByteBufferUtils.readCompressedInt(bis, width); + } catch (IOException e) { + fail("While reading fixed int"); + } + + assertEquals(value, parsedValue); + } + + private void testPutInt(int value) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try { + ByteBufferUtils.putInt(baos, value); + } catch (IOException e) { + throw new RuntimeException("Bug in putIn()", e); + } + + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + DataInputStream dis = new DataInputStream(bais); + try { + assertEquals(dis.readInt(), value); + } catch (IOException e) { + throw new RuntimeException("Bug in test!", e); + } + } +} diff --git src/test/java/org/apache/hadoop/hbase/util/TestLoadTestKVGenerator.java src/test/java/org/apache/hadoop/hbase/util/TestLoadTestKVGenerator.java index 55948b8..43aad3b 100644 --- src/test/java/org/apache/hadoop/hbase/util/TestLoadTestKVGenerator.java +++ src/test/java/org/apache/hadoop/hbase/util/TestLoadTestKVGenerator.java @@ -66,7 +66,7 @@ public class TestLoadTestKVGenerator { for (int i = 0; i < 1000; ++i) { String k = LoadTestKVGenerator.md5PrefixedKey(i); assertFalse(keys.contains(k)); - assertTrue(k.endsWith(":" + i)); + assertTrue(k.endsWith("-" + i)); keys.add(k); } } diff --git src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadEncoded.java src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadEncoded.java new file mode 100644 index 0000000..c0326aa --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadEncoded.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.junit.experimental.categories.Category; +import org.junit.runners.Parameterized.Parameters; + +/** + * Runs a load test on a mini HBase cluster with data block encoding turned on. + * Compared to other load-test-style unit tests, this one writes a smaller + * amount of data, but goes through all available data block encoding + * algorithms. + */ +@Category(LargeTests.class) +public class TestMiniClusterLoadEncoded extends TestMiniClusterLoadParallel { + + /** We do not alternate the multi-put flag in this test. */ + private static final boolean USE_MULTI_PUT = true; + + @Parameters + public static Collection parameters() { + List parameters = new ArrayList(); + for (DataBlockEncoding dataBlockEncoding : DataBlockEncoding.values() ) { + parameters.add(new Object[]{dataBlockEncoding}); + } + return parameters; + } + + public TestMiniClusterLoadEncoded(DataBlockEncoding encoding) { + super(USE_MULTI_PUT, encoding); + } + + /** + * Use a smaller number of keys in in this test. + */ + @Override + protected int numKeys() { + return 3000; + } + +} diff --git src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java index ef7eeef..eab23d5 100644 --- src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java +++ src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util; import static org.junit.Assert.assertEquals; import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -33,18 +34,19 @@ import org.junit.runners.Parameterized; public class TestMiniClusterLoadParallel extends TestMiniClusterLoadSequential { - public TestMiniClusterLoadParallel(boolean isMultiPut) { - super(isMultiPut); + public TestMiniClusterLoadParallel(boolean isMultiPut, + DataBlockEncoding encoding) { + super(isMultiPut, encoding); } - @Test(timeout=120000) + @Test(timeout=TIMEOUT_MS) public void loadTest() throws Exception { prepareForLoadTest(); readerThreads.linkToWriter(writerThreads); - writerThreads.start(0, NUM_KEYS, NUM_THREADS); - readerThreads.start(0, NUM_KEYS, NUM_THREADS); + writerThreads.start(0, numKeys, NUM_THREADS); + readerThreads.start(0, numKeys, NUM_THREADS); writerThreads.waitForFinish(); readerThreads.waitForFinish(); @@ -52,7 +54,7 @@ public class TestMiniClusterLoadParallel assertEquals(0, writerThreads.getNumWriteFailures()); assertEquals(0, readerThreads.getNumReadFailures()); assertEquals(0, readerThreads.getNumReadErrors()); - assertEquals(NUM_KEYS, readerThreads.getNumUniqueKeysVerified()); + assertEquals(numKeys, readerThreads.getNumUniqueKeysVerified()); } } diff --git src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java index de7f473..c83387e 100644 --- src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java +++ src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java @@ -19,14 +19,18 @@ package org.apache.hadoop.hbase.util; import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -48,25 +52,37 @@ public class TestMiniClusterLoadSequential { protected static final byte[] TABLE = Bytes.toBytes("load_test_tbl"); protected static final byte[] CF = Bytes.toBytes("load_test_cf"); - protected static final long NUM_KEYS = 10000; protected static final int NUM_THREADS = 8; protected static final int NUM_RS = 2; + protected static final int TIMEOUT_MS = 120000; protected static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); protected final Configuration conf = TEST_UTIL.getConfiguration(); protected final boolean isMultiPut; + protected final DataBlockEncoding dataBlockEncoding; protected MultiThreadedWriter writerThreads; protected MultiThreadedReader readerThreads; + protected int numKeys; - public TestMiniClusterLoadSequential(boolean isMultiPut) { + public TestMiniClusterLoadSequential(boolean isMultiPut, + DataBlockEncoding dataBlockEncoding) { this.isMultiPut = isMultiPut; + this.dataBlockEncoding = dataBlockEncoding; + conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024); } @Parameters public static Collection parameters() { - return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + List parameters = new ArrayList(); + for (boolean multiPut : new boolean[]{false, true}) { + for (DataBlockEncoding dataBlockEncoding : new DataBlockEncoding[] { + DataBlockEncoding.NONE, DataBlockEncoding.PREFIX }) { + parameters.add(new Object[]{multiPut, dataBlockEncoding}); + } + } + return parameters; } @Before @@ -81,22 +97,25 @@ public class TestMiniClusterLoadSequential { TEST_UTIL.shutdownMiniCluster(); } - @Test(timeout=120000) + @Test(timeout=TIMEOUT_MS) public void loadTest() throws Exception { prepareForLoadTest(); - writerThreads.start(0, NUM_KEYS, NUM_THREADS); + writerThreads.start(0, numKeys, NUM_THREADS); writerThreads.waitForFinish(); assertEquals(0, writerThreads.getNumWriteFailures()); - readerThreads.start(0, NUM_KEYS, NUM_THREADS); + readerThreads.start(0, numKeys, NUM_THREADS); readerThreads.waitForFinish(); assertEquals(0, readerThreads.getNumReadFailures()); assertEquals(0, readerThreads.getNumReadErrors()); - assertEquals(NUM_KEYS, readerThreads.getNumKeysVerified()); + assertEquals(numKeys, readerThreads.getNumKeysVerified()); } protected void prepareForLoadTest() throws IOException { + LOG.info("Starting load test: dataBlockEncoding=" + dataBlockEncoding + + ", isMultiPut=" + isMultiPut); + numKeys = numKeys(); HBaseAdmin admin = new HBaseAdmin(conf); while (admin.getClusterStatus().getServers().size() < NUM_RS) { LOG.info("Sleeping until " + NUM_RS + " RSs are online"); @@ -104,8 +123,8 @@ public class TestMiniClusterLoadSequential { } admin.close(); - int numRegions = - HBaseTestingUtility.createPreSplitLoadTestTable(conf, TABLE, CF); + int numRegions = HBaseTestingUtility.createPreSplitLoadTestTable(conf, + TABLE, CF, dataBlockEncoding); TEST_UTIL.waitUntilAllRegionsAssigned(numRegions); writerThreads = new MultiThreadedWriter(conf, TABLE, CF); @@ -113,4 +132,8 @@ public class TestMiniClusterLoadSequential { readerThreads = new MultiThreadedReader(conf, TABLE, CF, 100); } + protected int numKeys() { + return 10000; + } + } -- 1.7.4.4